| SYSTEM = '' | |
| accumulative_counts = 256 | |
| batch_size = 1 | |
| betas = ( | |
| 0.9, | |
| 0.999, | |
| ) | |
| custom_hooks = [ | |
| dict( | |
| tokenizer=dict( | |
| padding_side='right', | |
| pretrained_model_name_or_path= | |
| '/mnt/program/wlx_data/ckpt/hf_models/fla-hub/rwkv7-1.5B-world/', | |
| trust_remote_code=True, | |
| type='transformers.AutoTokenizer.from_pretrained'), | |
| type='xtuner.engine.DatasetInfoHook'), | |
| dict( | |
| evaluation_images='/mnt/program/wlx_data/experiments/assets/view.jpg', | |
| evaluation_inputs=[ | |
| 'Please describe this picture in detail', | |
| 'What should I be cautious about when I visit this place?', | |
| ], | |
| every_n_iters=5000, | |
| image_processor=dict( | |
| crop_size=384, | |
| pretrained_model_name_or_path= | |
| 'laion/CLIP-ViT-bigG-14-laion2B-39B-b160k', | |
| size=384, | |
| trust_remote_code=True, | |
| type='transformers.CLIPImageProcessor.from_pretrained'), | |
| prompt_template='xtuner.utils.PROMPT_TEMPLATE.vicuna', | |
| system='', | |
| tokenizer=dict( | |
| padding_side='right', | |
| pretrained_model_name_or_path= | |
| '/mnt/program/wlx_data/ckpt/hf_models/fla-hub/rwkv7-1.5B-world/', | |
| trust_remote_code=True, | |
| type='transformers.AutoTokenizer.from_pretrained'), | |
| type='xtuner.engine.EvaluateChatHook'), | |
| ] | |
| data_path = '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/densefusion/filtered_densefusion.jsonl' | |
| data_root = '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/densefusion/' | |
| dataloader_num_workers = 16 | |
| default_hooks = dict( | |
| checkpoint=dict( | |
| by_epoch=False, | |
| interval=10000, | |
| max_keep_ckpts=2, | |
| type='mmengine.hooks.CheckpointHook'), | |
| logger=dict(interval=10, type='mmengine.hooks.LoggerHook'), | |
| param_scheduler=dict(type='mmengine.hooks.ParamSchedulerHook'), | |
| sampler_seed=dict(type='mmengine.hooks.DistSamplerSeedHook'), | |
| timer=dict(type='mmengine.hooks.IterTimerHook')) | |
| env_cfg = dict( | |
| cudnn_benchmark=False, | |
| dist_cfg=dict(backend='nccl'), | |
| mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0)) | |
| evaluation_freq = 5000 | |
| evaluation_images = '/mnt/program/wlx_data/experiments/assets/view.jpg' | |
| evaluation_inputs = [ | |
| 'Please describe this picture in detail', | |
| 'What should I be cautious about when I visit this place?', | |
| ] | |
| image_folder = '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/densefusion/data/MiraData/frames/' | |
| image_processor = dict( | |
| crop_size=384, | |
| pretrained_model_name_or_path='laion/CLIP-ViT-bigG-14-laion2B-39B-b160k', | |
| size=384, | |
| trust_remote_code=True, | |
| type='transformers.CLIPImageProcessor.from_pretrained') | |
| language_prefix = '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/' | |
| launcher = 'pytorch' | |
| llava_dataset_1 = dict( | |
| data_path= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/evol/evol.jsonl', | |
| dataset_map_fn='xtuner.dataset.map_fns.aurora_map_fn', | |
| image_folder= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/evol/', | |
| image_processor=dict( | |
| crop_size=384, | |
| pretrained_model_name_or_path= | |
| 'laion/CLIP-ViT-bigG-14-laion2B-39B-b160k', | |
| size=384, | |
| trust_remote_code=True, | |
| type='transformers.CLIPImageProcessor.from_pretrained'), | |
| max_length=4096, | |
| offline_processed_text_folder= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/evol/tokenized-by-rwkv7/tokenized_sandwich_data', | |
| pad_image_to_square=True, | |
| template_map_fn=dict( | |
| template='xtuner.utils.PROMPT_TEMPLATE.vicuna', | |
| type='xtuner.dataset.map_fns.template_map_fn_factory'), | |
| tokenizer=dict( | |
| padding_side='right', | |
| pretrained_model_name_or_path= | |
| '/mnt/program/wlx_data/ckpt/hf_models/fla-hub/rwkv7-1.5B-world/', | |
| trust_remote_code=True, | |
| type='transformers.AutoTokenizer.from_pretrained'), | |
| type='xtuner.dataset.AuroraDataset') | |
| llava_dataset_10 = dict( | |
| data_path= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/sharegpt4video_qa/sharegpt4video_qa.jsonl', | |
| dataset_map_fn='xtuner.dataset.map_fns.aurora_map_fn', | |
| image_folder= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/sharegpt4video_qa/data/ShareGPT4Video-raw/vqa_frames/', | |
| image_processor=dict( | |
| crop_size=384, | |
| pretrained_model_name_or_path= | |
| 'laion/CLIP-ViT-bigG-14-laion2B-39B-b160k', | |
| size=384, | |
| trust_remote_code=True, | |
| type='transformers.CLIPImageProcessor.from_pretrained'), | |
| max_length=4096, | |
| pad_image_to_square=True, | |
| template_map_fn=dict( | |
| template='xtuner.utils.PROMPT_TEMPLATE.vicuna', | |
| type='xtuner.dataset.map_fns.template_map_fn_factory'), | |
| tokenizer=dict( | |
| padding_side='right', | |
| pretrained_model_name_or_path= | |
| '/mnt/program/wlx_data/ckpt/hf_models/fla-hub/rwkv7-1.5B-world/', | |
| trust_remote_code=True, | |
| type='transformers.AutoTokenizer.from_pretrained'), | |
| type='xtuner.dataset.AuroraDataset') | |
| llava_dataset_11 = dict( | |
| data_path= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/m4/filtered_m4.jsonl', | |
| dataset_map_fn='xtuner.dataset.map_fns.aurora_map_fn', | |
| image_folder= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/m4/data/M4-Instruct-Data/images/', | |
| image_processor=dict( | |
| crop_size=384, | |
| pretrained_model_name_or_path= | |
| 'laion/CLIP-ViT-bigG-14-laion2B-39B-b160k', | |
| size=384, | |
| trust_remote_code=True, | |
| type='transformers.CLIPImageProcessor.from_pretrained'), | |
| max_length=4096, | |
| offline_processed_text_folder= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/m4/data/M4-Instruct-Data/images/tokenized-by-rwkv7/tokenized_sandwich_data', | |
| pad_image_to_square=True, | |
| template_map_fn=dict( | |
| template='xtuner.utils.PROMPT_TEMPLATE.vicuna', | |
| type='xtuner.dataset.map_fns.template_map_fn_factory'), | |
| tokenizer=dict( | |
| padding_side='right', | |
| pretrained_model_name_or_path= | |
| '/mnt/program/wlx_data/ckpt/hf_models/fla-hub/rwkv7-1.5B-world/', | |
| trust_remote_code=True, | |
| type='transformers.AutoTokenizer.from_pretrained'), | |
| type='xtuner.dataset.AuroraDataset') | |
| llava_dataset_12 = dict( | |
| data_path= | |
| '/data1/exs_data/datasets/AuroraCap-trainset/language_sharegpt4video_caption/language_sharegpt4video_caption.jsonl', | |
| dataset_map_fn='xtuner.dataset.map_fns.aurora_map_fn', | |
| image_folder= | |
| '/data1/exs_data/datasets/AuroraCap-trainset/language_sharegpt4video_caption/data/ShareGPT4Video-raw/caption_frames/', | |
| image_processor=dict( | |
| crop_size=384, | |
| pretrained_model_name_or_path= | |
| 'laion/CLIP-ViT-bigG-14-laion2B-39B-b160k', | |
| size=384, | |
| trust_remote_code=True, | |
| type='transformers.CLIPImageProcessor.from_pretrained'), | |
| max_length=4096, | |
| pad_image_to_square=True, | |
| template_map_fn=dict( | |
| template='xtuner.utils.PROMPT_TEMPLATE.vicuna', | |
| type='xtuner.dataset.map_fns.template_map_fn_factory'), | |
| tokenizer=dict( | |
| padding_side='right', | |
| pretrained_model_name_or_path= | |
| '/mnt/program/wlx_data/ckpt/hf_models/fla-hub/rwkv7-1.5B-world/', | |
| trust_remote_code=True, | |
| type='transformers.AutoTokenizer.from_pretrained'), | |
| type='xtuner.dataset.AuroraDataset') | |
| llava_dataset_13 = dict( | |
| data_path= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/miradata/filtered_miradata.jsonl', | |
| dataset_map_fn='xtuner.dataset.map_fns.aurora_map_fn', | |
| image_folder= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/miradata/data/MiraData/frames/', | |
| image_processor=dict( | |
| crop_size=384, | |
| pretrained_model_name_or_path= | |
| 'laion/CLIP-ViT-bigG-14-laion2B-39B-b160k', | |
| size=384, | |
| trust_remote_code=True, | |
| type='transformers.CLIPImageProcessor.from_pretrained'), | |
| max_length=4096, | |
| offline_processed_text_folder= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/miradata/data/MiraData/frames/tokenized-by-rwkv7/tokenized_sandwich_data', | |
| pad_image_to_square=True, | |
| template_map_fn=dict( | |
| template='xtuner.utils.PROMPT_TEMPLATE.vicuna', | |
| type='xtuner.dataset.map_fns.template_map_fn_factory'), | |
| tokenizer=dict( | |
| padding_side='right', | |
| pretrained_model_name_or_path= | |
| '/mnt/program/wlx_data/ckpt/hf_models/fla-hub/rwkv7-1.5B-world/', | |
| trust_remote_code=True, | |
| type='transformers.AutoTokenizer.from_pretrained'), | |
| type='xtuner.dataset.AuroraDataset') | |
| llava_dataset_14 = dict( | |
| data_path= | |
| '/data1/exs_data/datasets/AuroraCap-trainset/language_sharegpt4v/language_sharegpt4v.jsonl', | |
| dataset_map_fn='xtuner.dataset.map_fns.aurora_map_fn', | |
| image_folder= | |
| '/data1/exs_data/datasets/AuroraCap-trainset/language_sharegpt4v/data/', | |
| image_processor=dict( | |
| crop_size=384, | |
| pretrained_model_name_or_path= | |
| 'laion/CLIP-ViT-bigG-14-laion2B-39B-b160k', | |
| size=384, | |
| trust_remote_code=True, | |
| type='transformers.CLIPImageProcessor.from_pretrained'), | |
| max_length=4096, | |
| offline_processed_text_folder= | |
| '/data1/exs_data/datasets/AuroraCap-trainset/language_sharegpt4v/data/tokenized_data', | |
| pad_image_to_square=True, | |
| template_map_fn=dict( | |
| template='xtuner.utils.PROMPT_TEMPLATE.vicuna', | |
| type='xtuner.dataset.map_fns.template_map_fn_factory'), | |
| tokenizer=dict( | |
| padding_side='right', | |
| pretrained_model_name_or_path= | |
| '/mnt/program/wlx_data/ckpt/hf_models/fla-hub/rwkv7-1.5B-world/', | |
| trust_remote_code=True, | |
| type='transformers.AutoTokenizer.from_pretrained'), | |
| type='xtuner.dataset.AuroraDataset') | |
| llava_dataset_15 = dict( | |
| data_path= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/densefusion/filtered_densefusion.jsonl', | |
| dataset_map_fn='xtuner.dataset.map_fns.aurora_map_fn', | |
| image_folder= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/densefusion/data/MiraData/frames/', | |
| image_processor=dict( | |
| crop_size=384, | |
| pretrained_model_name_or_path= | |
| 'laion/CLIP-ViT-bigG-14-laion2B-39B-b160k', | |
| size=384, | |
| trust_remote_code=True, | |
| type='transformers.CLIPImageProcessor.from_pretrained'), | |
| max_length=4096, | |
| pad_image_to_square=True, | |
| template_map_fn=dict( | |
| template='xtuner.utils.PROMPT_TEMPLATE.vicuna', | |
| type='xtuner.dataset.map_fns.template_map_fn_factory'), | |
| tokenizer=dict( | |
| padding_side='right', | |
| pretrained_model_name_or_path= | |
| '/mnt/program/wlx_data/ckpt/hf_models/fla-hub/rwkv7-1.5B-world/', | |
| trust_remote_code=True, | |
| type='transformers.AutoTokenizer.from_pretrained'), | |
| type='xtuner.dataset.AuroraDataset') | |
| llava_dataset_2 = dict( | |
| data_path= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/facecaption/filtered_facecaption.jsonl', | |
| dataset_map_fn='xtuner.dataset.map_fns.aurora_map_fn', | |
| image_folder= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/facecaption/data/FaceCaption-15M/images/', | |
| image_processor=dict( | |
| crop_size=384, | |
| pretrained_model_name_or_path= | |
| 'laion/CLIP-ViT-bigG-14-laion2B-39B-b160k', | |
| size=384, | |
| trust_remote_code=True, | |
| type='transformers.CLIPImageProcessor.from_pretrained'), | |
| max_length=4096, | |
| offline_processed_text_folder= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/facecaption/data/FaceCaption-15M/images/tokenized-by-rwkv7/tokenized_sandwich_data', | |
| pad_image_to_square=True, | |
| template_map_fn=dict( | |
| template='xtuner.utils.PROMPT_TEMPLATE.vicuna', | |
| type='xtuner.dataset.map_fns.template_map_fn_factory'), | |
| tokenizer=dict( | |
| padding_side='right', | |
| pretrained_model_name_or_path= | |
| '/mnt/program/wlx_data/ckpt/hf_models/fla-hub/rwkv7-1.5B-world/', | |
| trust_remote_code=True, | |
| type='transformers.AutoTokenizer.from_pretrained'), | |
| type='xtuner.dataset.AuroraDataset') | |
| llava_dataset_3 = dict( | |
| data_path= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/llavanext/filtered_llavanext.jsonl', | |
| dataset_map_fn='xtuner.dataset.map_fns.aurora_map_fn', | |
| image_folder= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/llavanext/data/LLaVA-NeXT-Data/images/', | |
| image_processor=dict( | |
| crop_size=384, | |
| pretrained_model_name_or_path= | |
| 'laion/CLIP-ViT-bigG-14-laion2B-39B-b160k', | |
| size=384, | |
| trust_remote_code=True, | |
| type='transformers.CLIPImageProcessor.from_pretrained'), | |
| max_length=4096, | |
| offline_processed_text_folder= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/llavanext/data/LLaVA-NeXT-Data/images/tokenized-by-rwkv7/tokenized_sandwich_data', | |
| pad_image_to_square=True, | |
| template_map_fn=dict( | |
| template='xtuner.utils.PROMPT_TEMPLATE.vicuna', | |
| type='xtuner.dataset.map_fns.template_map_fn_factory'), | |
| tokenizer=dict( | |
| padding_side='right', | |
| pretrained_model_name_or_path= | |
| '/mnt/program/wlx_data/ckpt/hf_models/fla-hub/rwkv7-1.5B-world/', | |
| trust_remote_code=True, | |
| type='transformers.AutoTokenizer.from_pretrained'), | |
| type='xtuner.dataset.AuroraDataset') | |
| llava_dataset_4 = dict( | |
| data_path= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/allava_caption_vflan/filtered_allava_caption_vflan.jsonl', | |
| dataset_map_fn='xtuner.dataset.map_fns.aurora_map_fn', | |
| image_folder= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/allava_caption_vflan/data/ALLaVA-4V/', | |
| image_processor=dict( | |
| crop_size=384, | |
| pretrained_model_name_or_path= | |
| 'laion/CLIP-ViT-bigG-14-laion2B-39B-b160k', | |
| size=384, | |
| trust_remote_code=True, | |
| type='transformers.CLIPImageProcessor.from_pretrained'), | |
| max_length=4096, | |
| offline_processed_text_folder= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/allava_caption_vflan/data/ALLaVA-4V/tokenized-by-rwkv7/tokenized_sandwich_data', | |
| pad_image_to_square=True, | |
| template_map_fn=dict( | |
| template='xtuner.utils.PROMPT_TEMPLATE.vicuna', | |
| type='xtuner.dataset.map_fns.template_map_fn_factory'), | |
| tokenizer=dict( | |
| padding_side='right', | |
| pretrained_model_name_or_path= | |
| '/mnt/program/wlx_data/ckpt/hf_models/fla-hub/rwkv7-1.5B-world/', | |
| trust_remote_code=True, | |
| type='transformers.AutoTokenizer.from_pretrained'), | |
| type='xtuner.dataset.AuroraDataset') | |
| llava_dataset_5 = dict( | |
| data_path= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/allava_instruct_vflan/filtered_allava_instruct_vflan.jsonl', | |
| dataset_map_fn='xtuner.dataset.map_fns.aurora_map_fn', | |
| image_folder= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/allava_instruct_vflan/data/ALLaVA-4V/', | |
| image_processor=dict( | |
| crop_size=384, | |
| pretrained_model_name_or_path= | |
| 'laion/CLIP-ViT-bigG-14-laion2B-39B-b160k', | |
| size=384, | |
| trust_remote_code=True, | |
| type='transformers.CLIPImageProcessor.from_pretrained'), | |
| max_length=4096, | |
| offline_processed_text_folder= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/allava_instruct_vflan/data/ALLaVA-4V/tokenized-by-rwkv7/tokenized_sandwich_data', | |
| pad_image_to_square=True, | |
| template_map_fn=dict( | |
| template='xtuner.utils.PROMPT_TEMPLATE.vicuna', | |
| type='xtuner.dataset.map_fns.template_map_fn_factory'), | |
| tokenizer=dict( | |
| padding_side='right', | |
| pretrained_model_name_or_path= | |
| '/mnt/program/wlx_data/ckpt/hf_models/fla-hub/rwkv7-1.5B-world/', | |
| trust_remote_code=True, | |
| type='transformers.AutoTokenizer.from_pretrained'), | |
| type='xtuner.dataset.AuroraDataset') | |
| llava_dataset_6 = dict( | |
| data_path= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/llava_mix/filtered_llava_mix.jsonl', | |
| dataset_map_fn='xtuner.dataset.map_fns.aurora_map_fn', | |
| image_folder= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/llava_mix/data/LLaVA-Instruct-150K/images/', | |
| image_processor=dict( | |
| crop_size=384, | |
| pretrained_model_name_or_path= | |
| 'laion/CLIP-ViT-bigG-14-laion2B-39B-b160k', | |
| size=384, | |
| trust_remote_code=True, | |
| type='transformers.CLIPImageProcessor.from_pretrained'), | |
| max_length=4096, | |
| offline_processed_text_folder= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/llava_mix/data/LLaVA-Instruct-150K/images/tokenized-by-rwkv7/tokenized_sandwich_data', | |
| pad_image_to_square=True, | |
| template_map_fn=dict( | |
| template='xtuner.utils.PROMPT_TEMPLATE.vicuna', | |
| type='xtuner.dataset.map_fns.template_map_fn_factory'), | |
| tokenizer=dict( | |
| padding_side='right', | |
| pretrained_model_name_or_path= | |
| '/mnt/program/wlx_data/ckpt/hf_models/fla-hub/rwkv7-1.5B-world/', | |
| trust_remote_code=True, | |
| type='transformers.AutoTokenizer.from_pretrained'), | |
| type='xtuner.dataset.AuroraDataset') | |
| llava_dataset_7 = dict( | |
| data_path= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/allava_caption_laion/filtered_allava_caption_laion.jsonl', | |
| dataset_map_fn='xtuner.dataset.map_fns.aurora_map_fn', | |
| image_folder= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/allava_caption_laion/data/ALLaVA-4V/', | |
| image_processor=dict( | |
| crop_size=384, | |
| pretrained_model_name_or_path= | |
| 'laion/CLIP-ViT-bigG-14-laion2B-39B-b160k', | |
| size=384, | |
| trust_remote_code=True, | |
| type='transformers.CLIPImageProcessor.from_pretrained'), | |
| max_length=4096, | |
| offline_processed_text_folder= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/allava_caption_laion/data/ALLaVA-4V/tokenized-by-rwkv7/tokenized_sandwich_data', | |
| pad_image_to_square=True, | |
| template_map_fn=dict( | |
| template='xtuner.utils.PROMPT_TEMPLATE.vicuna', | |
| type='xtuner.dataset.map_fns.template_map_fn_factory'), | |
| tokenizer=dict( | |
| padding_side='right', | |
| pretrained_model_name_or_path= | |
| '/mnt/program/wlx_data/ckpt/hf_models/fla-hub/rwkv7-1.5B-world/', | |
| trust_remote_code=True, | |
| type='transformers.AutoTokenizer.from_pretrained'), | |
| type='xtuner.dataset.AuroraDataset') | |
| llava_dataset_8 = dict( | |
| data_path= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/allava_instruct_laion/filtered_allava_instruct_laion.jsonl', | |
| dataset_map_fn='xtuner.dataset.map_fns.aurora_map_fn', | |
| image_folder= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/allava_instruct_laion/data/ALLaVA-4V/', | |
| image_processor=dict( | |
| crop_size=384, | |
| pretrained_model_name_or_path= | |
| 'laion/CLIP-ViT-bigG-14-laion2B-39B-b160k', | |
| size=384, | |
| trust_remote_code=True, | |
| type='transformers.CLIPImageProcessor.from_pretrained'), | |
| max_length=4096, | |
| offline_processed_text_folder= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/allava_instruct_laion/data/ALLaVA-4V/tokenized-by-rwkv7/tokenized_sandwich_data', | |
| pad_image_to_square=True, | |
| template_map_fn=dict( | |
| template='xtuner.utils.PROMPT_TEMPLATE.vicuna', | |
| type='xtuner.dataset.map_fns.template_map_fn_factory'), | |
| tokenizer=dict( | |
| padding_side='right', | |
| pretrained_model_name_or_path= | |
| '/mnt/program/wlx_data/ckpt/hf_models/fla-hub/rwkv7-1.5B-world/', | |
| trust_remote_code=True, | |
| type='transformers.AutoTokenizer.from_pretrained'), | |
| type='xtuner.dataset.AuroraDataset') | |
| llava_dataset_9 = dict( | |
| data_path= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/cambrian/filtered_cambrian.jsonl', | |
| dataset_map_fn='xtuner.dataset.map_fns.aurora_map_fn', | |
| image_folder= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/cambrian/data/Cambrian-10M/images/', | |
| image_processor=dict( | |
| crop_size=384, | |
| pretrained_model_name_or_path= | |
| 'laion/CLIP-ViT-bigG-14-laion2B-39B-b160k', | |
| size=384, | |
| trust_remote_code=True, | |
| type='transformers.CLIPImageProcessor.from_pretrained'), | |
| max_length=4096, | |
| pad_image_to_square=True, | |
| template_map_fn=dict( | |
| template='xtuner.utils.PROMPT_TEMPLATE.vicuna', | |
| type='xtuner.dataset.map_fns.template_map_fn_factory'), | |
| tokenizer=dict( | |
| padding_side='right', | |
| pretrained_model_name_or_path= | |
| '/mnt/program/wlx_data/ckpt/hf_models/fla-hub/rwkv7-1.5B-world/', | |
| trust_remote_code=True, | |
| type='transformers.AutoTokenizer.from_pretrained'), | |
| type='xtuner.dataset.AuroraDataset') | |
| llava_ov_strategy = False | |
| llm_name_or_path = '/mnt/program/wlx_data/ckpt/hf_models/fla-hub/rwkv7-1.5B-world/' | |
| load_from = None | |
| log_level = 'INFO' | |
| lr = 8e-05 | |
| max_epochs = 1 | |
| max_length = 4096 | |
| max_norm = 1 | |
| model = dict( | |
| freeze_llm=False, | |
| freeze_proj=False, | |
| freeze_visual_encoder=False, | |
| llava_ov_strategy=False, | |
| llm=dict( | |
| pretrained_model_name_or_path= | |
| '/mnt/program/wlx_data/ckpt/hf_models/fla-hub/rwkv7-1.5B-world/', | |
| quantization_config=None, | |
| torch_dtype='torch.float16', | |
| trust_remote_code=True, | |
| type='transformers.AutoModelForCausalLM.from_pretrained'), | |
| pretrained_pth= | |
| '/mnt/program/wlx_data/experiments/vision-aurora-rwkv7-0315/iter_36290.hf/projector', | |
| slowfast=False, | |
| type='xtuner.model.AuroraModel', | |
| visual_encoder=dict( | |
| order='descending', | |
| pretrained_model_name_or_path= | |
| '/mnt/program/wlx_data/experiments/vision-aurora-rwkv7-0315/iter_36290.hf/visual_encoder', | |
| sort_after_merge=True, | |
| type='xtuner.model.aurora.AuroraSigEncoder.from_pretrained', | |
| visual_token_merge_ratio=1.0)) | |
| optim_type = 'torch.optim.AdamW' | |
| optim_wrapper = dict( | |
| optimizer=dict( | |
| betas=( | |
| 0.9, | |
| 0.999, | |
| ), | |
| lr=8e-05, | |
| type='torch.optim.AdamW', | |
| weight_decay=0), | |
| type='DeepSpeedOptimWrapper') | |
| param_scheduler = [ | |
| dict( | |
| begin=0, | |
| by_epoch=True, | |
| convert_to_iter_based=True, | |
| end=0.03, | |
| start_factor=1e-05, | |
| type='mmengine.optim.LinearLR'), | |
| dict( | |
| T_max=1, | |
| begin=0.03, | |
| by_epoch=True, | |
| convert_to_iter_based=True, | |
| eta_min=0.0, | |
| type='mmengine.optim.CosineAnnealingLR'), | |
| ] | |
| pretrained_pth = '/mnt/program/wlx_data/experiments/vision-aurora-rwkv7-0315/iter_36290.hf/projector' | |
| prompt_template = 'xtuner.utils.PROMPT_TEMPLATE.vicuna' | |
| randomness = dict(deterministic=False, seed=None) | |
| resume = False | |
| runner_type = 'FlexibleRunner' | |
| save_steps = 10000 | |
| save_total_limit = 2 | |
| size = 384 | |
| slowfast = False | |
| strategy = dict( | |
| config=dict( | |
| bf16=dict(enabled=True), | |
| fp16=dict(enabled=False, initial_scale_power=16), | |
| gradient_accumulation_steps='auto', | |
| gradient_clipping='auto', | |
| train_micro_batch_size_per_gpu='auto', | |
| zero_allow_untested_optimizer=True, | |
| zero_force_ds_cpu_optimizer=False, | |
| zero_optimization=dict(overlap_comm=True, stage=1)), | |
| exclude_frozen_parameters=True, | |
| gradient_accumulation_steps=256, | |
| gradient_clipping=1, | |
| sequence_parallel_size=1, | |
| train_micro_batch_size_per_gpu=1, | |
| type='xtuner.engine.DeepSpeedStrategy') | |
| tokenizer = dict( | |
| padding_side='right', | |
| pretrained_model_name_or_path= | |
| '/mnt/program/wlx_data/ckpt/hf_models/fla-hub/rwkv7-1.5B-world/', | |
| trust_remote_code=True, | |
| type='transformers.AutoTokenizer.from_pretrained') | |
| train_cfg = dict(by_epoch=True, max_epochs=1, val_interval=1) | |
| train_dataloader = dict( | |
| batch_size=1, | |
| collate_fn=dict(type='xtuner.dataset.collate_fns.default_collate_fn'), | |
| dataset=dict( | |
| datasets=[ | |
| dict( | |
| data_path= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/allava_caption_vflan/filtered_allava_caption_vflan.jsonl', | |
| dataset_map_fn='xtuner.dataset.map_fns.aurora_map_fn', | |
| image_folder= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/allava_caption_vflan/data/ALLaVA-4V/', | |
| image_processor=dict( | |
| crop_size=384, | |
| pretrained_model_name_or_path= | |
| 'laion/CLIP-ViT-bigG-14-laion2B-39B-b160k', | |
| size=384, | |
| trust_remote_code=True, | |
| type='transformers.CLIPImageProcessor.from_pretrained'), | |
| max_length=4096, | |
| offline_processed_text_folder= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/allava_caption_vflan/data/ALLaVA-4V/tokenized-by-rwkv7/tokenized_sandwich_data', | |
| pad_image_to_square=True, | |
| template_map_fn=dict( | |
| template='xtuner.utils.PROMPT_TEMPLATE.vicuna', | |
| type='xtuner.dataset.map_fns.template_map_fn_factory'), | |
| tokenizer=dict( | |
| padding_side='right', | |
| pretrained_model_name_or_path= | |
| '/mnt/program/wlx_data/ckpt/hf_models/fla-hub/rwkv7-1.5B-world/', | |
| trust_remote_code=True, | |
| type='transformers.AutoTokenizer.from_pretrained'), | |
| type='xtuner.dataset.AuroraDataset'), | |
| dict( | |
| data_path= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/allava_instruct_vflan/filtered_allava_instruct_vflan.jsonl', | |
| dataset_map_fn='xtuner.dataset.map_fns.aurora_map_fn', | |
| image_folder= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/allava_instruct_vflan/data/ALLaVA-4V/', | |
| image_processor=dict( | |
| crop_size=384, | |
| pretrained_model_name_or_path= | |
| 'laion/CLIP-ViT-bigG-14-laion2B-39B-b160k', | |
| size=384, | |
| trust_remote_code=True, | |
| type='transformers.CLIPImageProcessor.from_pretrained'), | |
| max_length=4096, | |
| offline_processed_text_folder= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/allava_instruct_vflan/data/ALLaVA-4V/tokenized-by-rwkv7/tokenized_sandwich_data', | |
| pad_image_to_square=True, | |
| template_map_fn=dict( | |
| template='xtuner.utils.PROMPT_TEMPLATE.vicuna', | |
| type='xtuner.dataset.map_fns.template_map_fn_factory'), | |
| tokenizer=dict( | |
| padding_side='right', | |
| pretrained_model_name_or_path= | |
| '/mnt/program/wlx_data/ckpt/hf_models/fla-hub/rwkv7-1.5B-world/', | |
| trust_remote_code=True, | |
| type='transformers.AutoTokenizer.from_pretrained'), | |
| type='xtuner.dataset.AuroraDataset'), | |
| dict( | |
| data_path= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/llava_mix/filtered_llava_mix.jsonl', | |
| dataset_map_fn='xtuner.dataset.map_fns.aurora_map_fn', | |
| image_folder= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/llava_mix/data/LLaVA-Instruct-150K/images/', | |
| image_processor=dict( | |
| crop_size=384, | |
| pretrained_model_name_or_path= | |
| 'laion/CLIP-ViT-bigG-14-laion2B-39B-b160k', | |
| size=384, | |
| trust_remote_code=True, | |
| type='transformers.CLIPImageProcessor.from_pretrained'), | |
| max_length=4096, | |
| offline_processed_text_folder= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/llava_mix/data/LLaVA-Instruct-150K/images/tokenized-by-rwkv7/tokenized_sandwich_data', | |
| pad_image_to_square=True, | |
| template_map_fn=dict( | |
| template='xtuner.utils.PROMPT_TEMPLATE.vicuna', | |
| type='xtuner.dataset.map_fns.template_map_fn_factory'), | |
| tokenizer=dict( | |
| padding_side='right', | |
| pretrained_model_name_or_path= | |
| '/mnt/program/wlx_data/ckpt/hf_models/fla-hub/rwkv7-1.5B-world/', | |
| trust_remote_code=True, | |
| type='transformers.AutoTokenizer.from_pretrained'), | |
| type='xtuner.dataset.AuroraDataset'), | |
| dict( | |
| data_path= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/allava_caption_laion/filtered_allava_caption_laion.jsonl', | |
| dataset_map_fn='xtuner.dataset.map_fns.aurora_map_fn', | |
| image_folder= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/allava_caption_laion/data/ALLaVA-4V/', | |
| image_processor=dict( | |
| crop_size=384, | |
| pretrained_model_name_or_path= | |
| 'laion/CLIP-ViT-bigG-14-laion2B-39B-b160k', | |
| size=384, | |
| trust_remote_code=True, | |
| type='transformers.CLIPImageProcessor.from_pretrained'), | |
| max_length=4096, | |
| offline_processed_text_folder= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/allava_caption_laion/data/ALLaVA-4V/tokenized-by-rwkv7/tokenized_sandwich_data', | |
| pad_image_to_square=True, | |
| template_map_fn=dict( | |
| template='xtuner.utils.PROMPT_TEMPLATE.vicuna', | |
| type='xtuner.dataset.map_fns.template_map_fn_factory'), | |
| tokenizer=dict( | |
| padding_side='right', | |
| pretrained_model_name_or_path= | |
| '/mnt/program/wlx_data/ckpt/hf_models/fla-hub/rwkv7-1.5B-world/', | |
| trust_remote_code=True, | |
| type='transformers.AutoTokenizer.from_pretrained'), | |
| type='xtuner.dataset.AuroraDataset'), | |
| dict( | |
| data_path= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/allava_instruct_laion/filtered_allava_instruct_laion.jsonl', | |
| dataset_map_fn='xtuner.dataset.map_fns.aurora_map_fn', | |
| image_folder= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/allava_instruct_laion/data/ALLaVA-4V/', | |
| image_processor=dict( | |
| crop_size=384, | |
| pretrained_model_name_or_path= | |
| 'laion/CLIP-ViT-bigG-14-laion2B-39B-b160k', | |
| size=384, | |
| trust_remote_code=True, | |
| type='transformers.CLIPImageProcessor.from_pretrained'), | |
| max_length=4096, | |
| offline_processed_text_folder= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/allava_instruct_laion/data/ALLaVA-4V/tokenized-by-rwkv7/tokenized_sandwich_data', | |
| pad_image_to_square=True, | |
| template_map_fn=dict( | |
| template='xtuner.utils.PROMPT_TEMPLATE.vicuna', | |
| type='xtuner.dataset.map_fns.template_map_fn_factory'), | |
| tokenizer=dict( | |
| padding_side='right', | |
| pretrained_model_name_or_path= | |
| '/mnt/program/wlx_data/ckpt/hf_models/fla-hub/rwkv7-1.5B-world/', | |
| trust_remote_code=True, | |
| type='transformers.AutoTokenizer.from_pretrained'), | |
| type='xtuner.dataset.AuroraDataset'), | |
| dict( | |
| data_path= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/m4/filtered_m4.jsonl', | |
| dataset_map_fn='xtuner.dataset.map_fns.aurora_map_fn', | |
| image_folder= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/m4/data/M4-Instruct-Data/images/', | |
| image_processor=dict( | |
| crop_size=384, | |
| pretrained_model_name_or_path= | |
| 'laion/CLIP-ViT-bigG-14-laion2B-39B-b160k', | |
| size=384, | |
| trust_remote_code=True, | |
| type='transformers.CLIPImageProcessor.from_pretrained'), | |
| max_length=4096, | |
| offline_processed_text_folder= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/m4/data/M4-Instruct-Data/images/tokenized-by-rwkv7/tokenized_sandwich_data', | |
| pad_image_to_square=True, | |
| template_map_fn=dict( | |
| template='xtuner.utils.PROMPT_TEMPLATE.vicuna', | |
| type='xtuner.dataset.map_fns.template_map_fn_factory'), | |
| tokenizer=dict( | |
| padding_side='right', | |
| pretrained_model_name_or_path= | |
| '/mnt/program/wlx_data/ckpt/hf_models/fla-hub/rwkv7-1.5B-world/', | |
| trust_remote_code=True, | |
| type='transformers.AutoTokenizer.from_pretrained'), | |
| type='xtuner.dataset.AuroraDataset'), | |
| dict( | |
| data_path= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/evol/evol.jsonl', | |
| dataset_map_fn='xtuner.dataset.map_fns.aurora_map_fn', | |
| image_folder= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/evol/', | |
| image_processor=dict( | |
| crop_size=384, | |
| pretrained_model_name_or_path= | |
| 'laion/CLIP-ViT-bigG-14-laion2B-39B-b160k', | |
| size=384, | |
| trust_remote_code=True, | |
| type='transformers.CLIPImageProcessor.from_pretrained'), | |
| max_length=4096, | |
| offline_processed_text_folder= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/evol/tokenized-by-rwkv7/tokenized_sandwich_data', | |
| pad_image_to_square=True, | |
| template_map_fn=dict( | |
| template='xtuner.utils.PROMPT_TEMPLATE.vicuna', | |
| type='xtuner.dataset.map_fns.template_map_fn_factory'), | |
| tokenizer=dict( | |
| padding_side='right', | |
| pretrained_model_name_or_path= | |
| '/mnt/program/wlx_data/ckpt/hf_models/fla-hub/rwkv7-1.5B-world/', | |
| trust_remote_code=True, | |
| type='transformers.AutoTokenizer.from_pretrained'), | |
| type='xtuner.dataset.AuroraDataset'), | |
| dict( | |
| data_path= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/llavanext/filtered_llavanext.jsonl', | |
| dataset_map_fn='xtuner.dataset.map_fns.aurora_map_fn', | |
| image_folder= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/llavanext/data/LLaVA-NeXT-Data/images/', | |
| image_processor=dict( | |
| crop_size=384, | |
| pretrained_model_name_or_path= | |
| 'laion/CLIP-ViT-bigG-14-laion2B-39B-b160k', | |
| size=384, | |
| trust_remote_code=True, | |
| type='transformers.CLIPImageProcessor.from_pretrained'), | |
| max_length=4096, | |
| offline_processed_text_folder= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/llavanext/data/LLaVA-NeXT-Data/images/tokenized-by-rwkv7/tokenized_sandwich_data', | |
| pad_image_to_square=True, | |
| template_map_fn=dict( | |
| template='xtuner.utils.PROMPT_TEMPLATE.vicuna', | |
| type='xtuner.dataset.map_fns.template_map_fn_factory'), | |
| tokenizer=dict( | |
| padding_side='right', | |
| pretrained_model_name_or_path= | |
| '/mnt/program/wlx_data/ckpt/hf_models/fla-hub/rwkv7-1.5B-world/', | |
| trust_remote_code=True, | |
| type='transformers.AutoTokenizer.from_pretrained'), | |
| type='xtuner.dataset.AuroraDataset'), | |
| dict( | |
| data_path= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/miradata/filtered_miradata.jsonl', | |
| dataset_map_fn='xtuner.dataset.map_fns.aurora_map_fn', | |
| image_folder= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/miradata/data/MiraData/frames/', | |
| image_processor=dict( | |
| crop_size=384, | |
| pretrained_model_name_or_path= | |
| 'laion/CLIP-ViT-bigG-14-laion2B-39B-b160k', | |
| size=384, | |
| trust_remote_code=True, | |
| type='transformers.CLIPImageProcessor.from_pretrained'), | |
| max_length=4096, | |
| offline_processed_text_folder= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/miradata/data/MiraData/frames/tokenized-by-rwkv7/tokenized_sandwich_data', | |
| pad_image_to_square=True, | |
| template_map_fn=dict( | |
| template='xtuner.utils.PROMPT_TEMPLATE.vicuna', | |
| type='xtuner.dataset.map_fns.template_map_fn_factory'), | |
| tokenizer=dict( | |
| padding_side='right', | |
| pretrained_model_name_or_path= | |
| '/mnt/program/wlx_data/ckpt/hf_models/fla-hub/rwkv7-1.5B-world/', | |
| trust_remote_code=True, | |
| type='transformers.AutoTokenizer.from_pretrained'), | |
| type='xtuner.dataset.AuroraDataset'), | |
| ], | |
| type='xtuner.dataset.ConcatDataset'), | |
| num_workers=16, | |
| sampler=dict(shuffle=True, type='mmengine.dataset.DefaultSampler')) | |
| train_dataset = dict( | |
| datasets=[ | |
| dict( | |
| data_path= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/allava_caption_vflan/filtered_allava_caption_vflan.jsonl', | |
| dataset_map_fn='xtuner.dataset.map_fns.aurora_map_fn', | |
| image_folder= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/allava_caption_vflan/data/ALLaVA-4V/', | |
| image_processor=dict( | |
| crop_size=384, | |
| pretrained_model_name_or_path= | |
| 'laion/CLIP-ViT-bigG-14-laion2B-39B-b160k', | |
| size=384, | |
| trust_remote_code=True, | |
| type='transformers.CLIPImageProcessor.from_pretrained'), | |
| max_length=4096, | |
| offline_processed_text_folder= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/allava_caption_vflan/data/ALLaVA-4V/tokenized-by-rwkv7/tokenized_sandwich_data', | |
| pad_image_to_square=True, | |
| template_map_fn=dict( | |
| template='xtuner.utils.PROMPT_TEMPLATE.vicuna', | |
| type='xtuner.dataset.map_fns.template_map_fn_factory'), | |
| tokenizer=dict( | |
| padding_side='right', | |
| pretrained_model_name_or_path= | |
| '/mnt/program/wlx_data/ckpt/hf_models/fla-hub/rwkv7-1.5B-world/', | |
| trust_remote_code=True, | |
| type='transformers.AutoTokenizer.from_pretrained'), | |
| type='xtuner.dataset.AuroraDataset'), | |
| dict( | |
| data_path= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/allava_instruct_vflan/filtered_allava_instruct_vflan.jsonl', | |
| dataset_map_fn='xtuner.dataset.map_fns.aurora_map_fn', | |
| image_folder= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/allava_instruct_vflan/data/ALLaVA-4V/', | |
| image_processor=dict( | |
| crop_size=384, | |
| pretrained_model_name_or_path= | |
| 'laion/CLIP-ViT-bigG-14-laion2B-39B-b160k', | |
| size=384, | |
| trust_remote_code=True, | |
| type='transformers.CLIPImageProcessor.from_pretrained'), | |
| max_length=4096, | |
| offline_processed_text_folder= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/allava_instruct_vflan/data/ALLaVA-4V/tokenized-by-rwkv7/tokenized_sandwich_data', | |
| pad_image_to_square=True, | |
| template_map_fn=dict( | |
| template='xtuner.utils.PROMPT_TEMPLATE.vicuna', | |
| type='xtuner.dataset.map_fns.template_map_fn_factory'), | |
| tokenizer=dict( | |
| padding_side='right', | |
| pretrained_model_name_or_path= | |
| '/mnt/program/wlx_data/ckpt/hf_models/fla-hub/rwkv7-1.5B-world/', | |
| trust_remote_code=True, | |
| type='transformers.AutoTokenizer.from_pretrained'), | |
| type='xtuner.dataset.AuroraDataset'), | |
| dict( | |
| data_path= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/llava_mix/filtered_llava_mix.jsonl', | |
| dataset_map_fn='xtuner.dataset.map_fns.aurora_map_fn', | |
| image_folder= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/llava_mix/data/LLaVA-Instruct-150K/images/', | |
| image_processor=dict( | |
| crop_size=384, | |
| pretrained_model_name_or_path= | |
| 'laion/CLIP-ViT-bigG-14-laion2B-39B-b160k', | |
| size=384, | |
| trust_remote_code=True, | |
| type='transformers.CLIPImageProcessor.from_pretrained'), | |
| max_length=4096, | |
| offline_processed_text_folder= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/llava_mix/data/LLaVA-Instruct-150K/images/tokenized-by-rwkv7/tokenized_sandwich_data', | |
| pad_image_to_square=True, | |
| template_map_fn=dict( | |
| template='xtuner.utils.PROMPT_TEMPLATE.vicuna', | |
| type='xtuner.dataset.map_fns.template_map_fn_factory'), | |
| tokenizer=dict( | |
| padding_side='right', | |
| pretrained_model_name_or_path= | |
| '/mnt/program/wlx_data/ckpt/hf_models/fla-hub/rwkv7-1.5B-world/', | |
| trust_remote_code=True, | |
| type='transformers.AutoTokenizer.from_pretrained'), | |
| type='xtuner.dataset.AuroraDataset'), | |
| dict( | |
| data_path= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/allava_caption_laion/filtered_allava_caption_laion.jsonl', | |
| dataset_map_fn='xtuner.dataset.map_fns.aurora_map_fn', | |
| image_folder= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/allava_caption_laion/data/ALLaVA-4V/', | |
| image_processor=dict( | |
| crop_size=384, | |
| pretrained_model_name_or_path= | |
| 'laion/CLIP-ViT-bigG-14-laion2B-39B-b160k', | |
| size=384, | |
| trust_remote_code=True, | |
| type='transformers.CLIPImageProcessor.from_pretrained'), | |
| max_length=4096, | |
| offline_processed_text_folder= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/allava_caption_laion/data/ALLaVA-4V/tokenized-by-rwkv7/tokenized_sandwich_data', | |
| pad_image_to_square=True, | |
| template_map_fn=dict( | |
| template='xtuner.utils.PROMPT_TEMPLATE.vicuna', | |
| type='xtuner.dataset.map_fns.template_map_fn_factory'), | |
| tokenizer=dict( | |
| padding_side='right', | |
| pretrained_model_name_or_path= | |
| '/mnt/program/wlx_data/ckpt/hf_models/fla-hub/rwkv7-1.5B-world/', | |
| trust_remote_code=True, | |
| type='transformers.AutoTokenizer.from_pretrained'), | |
| type='xtuner.dataset.AuroraDataset'), | |
| dict( | |
| data_path= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/allava_instruct_laion/filtered_allava_instruct_laion.jsonl', | |
| dataset_map_fn='xtuner.dataset.map_fns.aurora_map_fn', | |
| image_folder= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/allava_instruct_laion/data/ALLaVA-4V/', | |
| image_processor=dict( | |
| crop_size=384, | |
| pretrained_model_name_or_path= | |
| 'laion/CLIP-ViT-bigG-14-laion2B-39B-b160k', | |
| size=384, | |
| trust_remote_code=True, | |
| type='transformers.CLIPImageProcessor.from_pretrained'), | |
| max_length=4096, | |
| offline_processed_text_folder= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/allava_instruct_laion/data/ALLaVA-4V/tokenized-by-rwkv7/tokenized_sandwich_data', | |
| pad_image_to_square=True, | |
| template_map_fn=dict( | |
| template='xtuner.utils.PROMPT_TEMPLATE.vicuna', | |
| type='xtuner.dataset.map_fns.template_map_fn_factory'), | |
| tokenizer=dict( | |
| padding_side='right', | |
| pretrained_model_name_or_path= | |
| '/mnt/program/wlx_data/ckpt/hf_models/fla-hub/rwkv7-1.5B-world/', | |
| trust_remote_code=True, | |
| type='transformers.AutoTokenizer.from_pretrained'), | |
| type='xtuner.dataset.AuroraDataset'), | |
| dict( | |
| data_path= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/m4/filtered_m4.jsonl', | |
| dataset_map_fn='xtuner.dataset.map_fns.aurora_map_fn', | |
| image_folder= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/m4/data/M4-Instruct-Data/images/', | |
| image_processor=dict( | |
| crop_size=384, | |
| pretrained_model_name_or_path= | |
| 'laion/CLIP-ViT-bigG-14-laion2B-39B-b160k', | |
| size=384, | |
| trust_remote_code=True, | |
| type='transformers.CLIPImageProcessor.from_pretrained'), | |
| max_length=4096, | |
| offline_processed_text_folder= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/m4/data/M4-Instruct-Data/images/tokenized-by-rwkv7/tokenized_sandwich_data', | |
| pad_image_to_square=True, | |
| template_map_fn=dict( | |
| template='xtuner.utils.PROMPT_TEMPLATE.vicuna', | |
| type='xtuner.dataset.map_fns.template_map_fn_factory'), | |
| tokenizer=dict( | |
| padding_side='right', | |
| pretrained_model_name_or_path= | |
| '/mnt/program/wlx_data/ckpt/hf_models/fla-hub/rwkv7-1.5B-world/', | |
| trust_remote_code=True, | |
| type='transformers.AutoTokenizer.from_pretrained'), | |
| type='xtuner.dataset.AuroraDataset'), | |
| dict( | |
| data_path= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/evol/evol.jsonl', | |
| dataset_map_fn='xtuner.dataset.map_fns.aurora_map_fn', | |
| image_folder= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/evol/', | |
| image_processor=dict( | |
| crop_size=384, | |
| pretrained_model_name_or_path= | |
| 'laion/CLIP-ViT-bigG-14-laion2B-39B-b160k', | |
| size=384, | |
| trust_remote_code=True, | |
| type='transformers.CLIPImageProcessor.from_pretrained'), | |
| max_length=4096, | |
| offline_processed_text_folder= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/evol/tokenized-by-rwkv7/tokenized_sandwich_data', | |
| pad_image_to_square=True, | |
| template_map_fn=dict( | |
| template='xtuner.utils.PROMPT_TEMPLATE.vicuna', | |
| type='xtuner.dataset.map_fns.template_map_fn_factory'), | |
| tokenizer=dict( | |
| padding_side='right', | |
| pretrained_model_name_or_path= | |
| '/mnt/program/wlx_data/ckpt/hf_models/fla-hub/rwkv7-1.5B-world/', | |
| trust_remote_code=True, | |
| type='transformers.AutoTokenizer.from_pretrained'), | |
| type='xtuner.dataset.AuroraDataset'), | |
| dict( | |
| data_path= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/llavanext/filtered_llavanext.jsonl', | |
| dataset_map_fn='xtuner.dataset.map_fns.aurora_map_fn', | |
| image_folder= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/llavanext/data/LLaVA-NeXT-Data/images/', | |
| image_processor=dict( | |
| crop_size=384, | |
| pretrained_model_name_or_path= | |
| 'laion/CLIP-ViT-bigG-14-laion2B-39B-b160k', | |
| size=384, | |
| trust_remote_code=True, | |
| type='transformers.CLIPImageProcessor.from_pretrained'), | |
| max_length=4096, | |
| offline_processed_text_folder= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/llavanext/data/LLaVA-NeXT-Data/images/tokenized-by-rwkv7/tokenized_sandwich_data', | |
| pad_image_to_square=True, | |
| template_map_fn=dict( | |
| template='xtuner.utils.PROMPT_TEMPLATE.vicuna', | |
| type='xtuner.dataset.map_fns.template_map_fn_factory'), | |
| tokenizer=dict( | |
| padding_side='right', | |
| pretrained_model_name_or_path= | |
| '/mnt/program/wlx_data/ckpt/hf_models/fla-hub/rwkv7-1.5B-world/', | |
| trust_remote_code=True, | |
| type='transformers.AutoTokenizer.from_pretrained'), | |
| type='xtuner.dataset.AuroraDataset'), | |
| dict( | |
| data_path= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/miradata/filtered_miradata.jsonl', | |
| dataset_map_fn='xtuner.dataset.map_fns.aurora_map_fn', | |
| image_folder= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/miradata/data/MiraData/frames/', | |
| image_processor=dict( | |
| crop_size=384, | |
| pretrained_model_name_or_path= | |
| 'laion/CLIP-ViT-bigG-14-laion2B-39B-b160k', | |
| size=384, | |
| trust_remote_code=True, | |
| type='transformers.CLIPImageProcessor.from_pretrained'), | |
| max_length=4096, | |
| offline_processed_text_folder= | |
| '/mnt/program/wlx_data/datasets/wchai/AuroraCap-trainset/language/miradata/data/MiraData/frames/tokenized-by-rwkv7/tokenized_sandwich_data', | |
| pad_image_to_square=True, | |
| template_map_fn=dict( | |
| template='xtuner.utils.PROMPT_TEMPLATE.vicuna', | |
| type='xtuner.dataset.map_fns.template_map_fn_factory'), | |
| tokenizer=dict( | |
| padding_side='right', | |
| pretrained_model_name_or_path= | |
| '/mnt/program/wlx_data/ckpt/hf_models/fla-hub/rwkv7-1.5B-world/', | |
| trust_remote_code=True, | |
| type='transformers.AutoTokenizer.from_pretrained'), | |
| type='xtuner.dataset.AuroraDataset'), | |
| ], | |
| type='xtuner.dataset.ConcatDataset') | |
| visual_encoder_name_or_path = '/mnt/program/wlx_data/experiments/vision-aurora-rwkv7-0315/iter_36290.hf/visual_encoder' | |
| visual_token_merge_ratio = 1.0 | |
| visualizer = None | |
| warmup_ratio = 0.03 | |
| weight_decay = 0 | |
| work_dir = '/mnt/program/wlx_data/experiments/language-aurora-rwkv7-0317' | |