| datasets: | |
| # m3it: | |
| # data_type: images | |
| # sample_ratio: 4 | |
| # tasks: | |
| # - coco | |
| # - coco-goi | |
| # - coco-text | |
| # - imagenet | |
| # - coco-itm | |
| # - iqa | |
| # - mocheg | |
| # - vsr | |
| # - refcoco | |
| # - science-qa | |
| # - vqa-v2 | |
| # - gqa | |
| # - st-vqa | |
| # - text-vqa | |
| # - okvqa | |
| # - a-okvqa | |
| # | |
| tt_vqa: | |
| data_type: frames | |
| sample_ratio: 3 | |
| fps: 2.0 | |
| conv_type: single | |
| train_data_path: /mnt/bn/algo-masp-nas-2/xiangchen/dataset/masp/20231201_20240322_caption_250k.json | |
| ShareGPT4V: | |
| data_type: images | |
| sample_ratio: 1 | |
| gpt4v_tt_vqa: | |
| data_type: frames | |
| fps: 0.5 | |
| sample_ratio: 6 | |
| conv_type: single | |
| task_types: ['caption'] | |
| # gpt4v_public: | |
| # data_type: frames | |
| # fps: 1.0 | |
| # sample_ratio: 10 | |
| # conv_type: single | |
| # task_types: ['summary', 'detail'] | |
| # train_data_path: /mnt/bn/algo-masp-nas-2/xiangchen/data/shared_gpt4v_data/data_500k_filtered.json | |
| lk_video: | |
| data_type: frames | |
| conv_type: multi | |
| fps: 1.0 | |
| sample_ratio: 6 | |
| gpt4v_internal: | |
| data_type: frames | |
| fps: 2.0 | |
| sample_ratio: 1 | |
| conv_type: single | |
| task_types: ['detail'] | |