| | import os |
| | import random |
| | from llava.datasets.builder import DATASETS |
| |
|
| | from typing import Dict, Optional, Sequence, List |
| | from llava.datasets.data_cfgs import data_configs |
| | from llava.datasets.base_dataset import FramesTaskDataset |
| | from llava.datasets.prompts import internvid_prompt |
| | from llava.constants import DEFAULT_VIDEO_TOKEN |
| |
|
| |
|
| | class InternVidDataset(FramesTaskDataset): |
| | def __init__(self, anno_path, data_args=None, name='internvid'): |
| | super().__init__(anno_path=anno_path, |
| | data_args=data_args, |
| | name=name) |
| |
|
| | def text_preprocess(self, item) -> List[Dict[str, str]]: |
| | caption = item['caption'] |
| |
|
| | conversations = [ |
| | { |
| | 'from': 'human', |
| | 'value': DEFAULT_VIDEO_TOKEN + random.choice(internvid_prompt) |
| | }, |
| | { |
| | 'from': 'model', |
| | 'value': caption |
| | } |
| | ] |
| |
|
| | return conversations |
| |
|
| |
|
| | @DATASETS.register_obj |
| | def internvid(data_args): |
| | return InternVidDataset(data_configs["internvid"]['train_data_path'], data_args) |
| |
|
| |
|
| |
|
| |
|