| IMAGENET1K_TRAIN = dict( | |
| type='ImageNet1kDatasetTrain', | |
| filename= | |
| '/mnt/lustre/share_data/taiyan/dataset/imagenet1k/train900_pairs.jsonl', | |
| image_folder='/mnt/lustre/share_data/taiyan/dataset/ImageNet-1K', | |
| template_file= | |
| '/mnt/cache/taiyan/unify_mllm/config/_base_/dataset/template/ICL.json') | |
| DEFAULT_TRAIN_IMAGENET1K_VARIANT = dict( | |
| imagenet1k_train=dict( | |
| type='ImageNet1kDatasetTrain', | |
| filename= | |
| '/mnt/lustre/share_data/taiyan/dataset/imagenet1k/train900_pairs.jsonl', | |
| image_folder='/mnt/lustre/share_data/taiyan/dataset/ImageNet-1K', | |
| template_file= | |
| '/mnt/cache/taiyan/unify_mllm/config/_base_/dataset/template/ICL.json') | |
| ) | |
| model_args = dict( | |
| type='llava', | |
| version='v1', | |
| cache_dir=None, | |
| model_name_or_path= | |
| '/mnt/lustre/share_data/xiechi/misc/to_weichen/llava_pretrain_final19/checkpoint-44000/', | |
| vision_tower= | |
| '/mnt/lustre/share_data/chenkeqin/VG/ckpt/openai/clip-vit-large-patch14', | |
| pretrain_mm_mlp_adapter=None, | |
| mm_vision_select_layer=-2, | |
| model_max_length=30000, | |
| freeze_backbone=False, | |
| tune_mm_mlp_adapter=False, | |
| freeze_mm_mlp_adapter=False, | |
| freeze_mm_projector=False, | |
| is_multimodal=True, | |
| sep_image_conv_front=False, | |
| image_token_len=256, | |
| mm_use_im_start_end=True, | |
| target_processor=dict(boxes=dict(type='PlainBoxFormatter')), | |
| process_func_args=dict( | |
| conv=dict(type='LLavaConvProcessV1'), | |
| target=dict(type='BoxFormatProcess'), | |
| text=dict(type='LlavaTextProcessV2'), | |
| image=dict(type='LlavaImageProcessorV1')), | |
| conv_args=dict( | |
| conv_template=[ | |
| 'hypnotized_v1.0', 'hypnotized_v1.1', 'hypnotized_ans_v1.0', | |
| 'vicuna_v1.1', 'causal_v1.0', 'final_v1.0' | |
| ], | |
| transforms=dict(type='Expand2square'), | |
| tokenize_kwargs=dict(truncation_size=2048)), | |
| gen_kwargs_set_pad_token_id=True, | |
| gen_kwargs_set_bos_token_id=True, | |
| gen_kwargs_set_eos_token_id=True) | |
| training_args = dict( | |
| output_dir='/mnt/cache/taiyan/unify_mllm/checkpoints/2way_weight', | |
| overwrite_output_dir=True, | |
| report_to='none', | |
| seed=42, | |
| remove_unused_columns=False, | |
| do_train=True, | |
| per_device_train_batch_size=1, | |
| gradient_accumulation_steps=1, | |
| num_train_epochs=50, | |
| learning_rate=2e-05, | |
| lr_scheduler_type='cosine', | |
| weight_decay=0.0, | |
| warmup_ratio=0.03, | |
| evaluation_strategy='no', | |
| tf32=False, | |
| bf16=False, | |
| gradient_checkpointing=True, | |
| fsdp='full_shard auto_wrap', | |
| fsdp_transformer_layer_cls_to_wrap='LlamaDecoderLayer', | |
| logging_steps=10, | |
| save_strategy='steps', | |
| save_steps=500, | |
| do_eval=False, | |
| do_predict=False, | |
| predict_with_generate=True, | |
| per_device_eval_batch_size=8, | |
| dataloader_num_workers=4, | |
| fp16=True) | |
| data_args = dict( | |
| train=dict( | |
| type='ImageNet1kDatasetTrain', | |
| filename= | |
| '/mnt/lustre/share_data/taiyan/dataset/imagenet1k/train900_pairs.jsonl', | |
| image_folder='/mnt/lustre/share_data/taiyan/dataset/ImageNet-1K', | |
| template_file= | |
| '/mnt/cache/taiyan/unify_mllm/config/_base_/dataset/template/ICL.json', | |
| policy='policy_2way_weight'), | |
| validation=None, | |
| test=None, | |
| compute_metric=None, | |
| collator_kwargs=dict(padding=True, max_length=1024), | |
| gen_kwargs=dict(max_new_tokens=1024, num_beams=1), | |
| use_icl=True, | |
| shot=8) | |