File size: 5,674 Bytes
fe6c2e4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
from dataclasses import dataclass, field
from typing import Optional
import transformers


@dataclass
class DataArguments:
    lazy_preprocess: bool = False
    only_two_class: bool = False
    old_two_class: bool = False
    is_multimodal: bool = False
    # image path
    image_folder: Optional[str] = field(default='/home/emzhang/data/segmentation/refer_seg/images/mscoco/images/train2014')
    mask_config: Optional[str] = field(default="./objectrelator/mask_config/maskformer2_swin_base_384_bs16_50ep.yaml")
    image_aspect_ratio: str = 'square'
    image_grid_pinpoints: Optional[str] = field(default=None)
    region_mask_type: Optional[str] = field(default=None)
    # json path from building script
    json_path: str = '/home/emzhang/code/LLaVA/datasets/refcoco/refcoco_val.json'
    # json file to split takes
    split_path: str = ''
    split: str = 'val'
    model_path: str = '/home/emzhang/code/llava_zem/checkpoints/SEG_class_refcoco_after_fixbug'
    model_map_name: str = 'ObjectRelator'  
    SEG_norm: bool = field(default=False)
    SEG_proj: bool = field(default=True)
    criterion_type: Optional[str] = field(default="concat_seg")
    matcher_type: Optional[str] = field(default="wo_class")
    llm_pos: Optional[str] = field(default="none")
    ln_2048: bool = field(default=False)
    version_val: str = 'opt-iml-1.3b'
    seg_idx_back: bool = field(default=False)
    segmentation: bool = True
    eval_batch_size: int = 1
    dataloader_num_workers_val: int = 4
    thr: float = 0.5
    topk: int=1
    fuse_score: bool = field(default=False)
    seg_task: Optional[str] = field(default="region")
    seg_last: bool = field(default=True)
    num_chunks: int=1
    chunk_idx: int=0
    # multi-condition/single-condition
    condition: str = 'multi-condition'

    # for training
    refcoco_image_folder: Optional[str] = "/path/to/refer_seg/images/mscoco/images/train2014"
    image_first: bool = field(default=True)
    instruction_version: str = 'v1'
    instance_json_path: str = '/path/to/instruction_segmentation_train.json'
    lvis_json_path: str = '/path/to/lvis_instance_train.json'
    lvis_categories_path: str = '/path/to/lvis_instance_categories.json'
    # json path from building script
    region_json_path: str = '/path/to/visual_prompt_segmentation_train.json'
    panoptic_json_path: str = "/path/to/coco"
    ref_coco_path: str = '/path/to/refcoco/refcoco_train.json'
    ref_coco_plus_path: str = '/path/to/refcoco+/refcoco+_train.json'
    ref_coco_g_path: str = '/path/to/refcocog/refcocog_train.json'
    mmconv_path: str = '/path/to/llava_1_5'
    data_ratio: str = '1||1||1||1'
    fix_dataset_len: int = 0
    # json paths for joint training
    joint_json_ego2exo: str = '/path/to/joint_ego_exo.json'
    joint_json_exo2ego: str = '/path/to/joint_exo_ego.json'
  

@dataclass
class ModelArguments:
    model_name_or_path: Optional[str] = field(default="facebook/opt-125m")
    version: Optional[str] = field(default="v0")
    freeze_backbone: bool = field(default=False)
    train_backbone: bool = field(default=False)
    tune_mm_mlp_adapter: bool = field(default=False)
    vision_tower: Optional[str] = field(default=None)
    mm_vision_select_layer: Optional[int] = field(default=-1)  # default to the last layer
    pretrain_mm_mlp_adapter: Optional[str] = field(default=None)
    mm_use_im_start_end: bool = field(default=False)
    mm_use_im_patch_token: bool = field(default=True)
    mm_vision_select_feature: Optional[str] = field(default="patch")
    with_norm: bool = field(default=True)
    with_layernorm: bool = field(default=False)
    skip_init_vision: bool = field(default=False)
    with_sam: bool = field(default=False)
    with_swin: bool = field(default=False)
    with_teacher: bool = field(default=False)
    swin_type: Optional[str] = field(default="base")
    projector_outdim: Optional[int] = field(default=2048)
    mm_projector_type: Optional[str] = field(default="swin_conv")
    model_version: Optional[str] = field(default="v1")
    load_mask2former: bool = field(default=True)
    dino_path: Optional[str] = field(default=None)


@dataclass
class TrainingArguments(transformers.TrainingArguments):
    cache_dir: Optional[str] = field(default=None)
    optim: str = field(default="adamw_torch")
    remove_unused_columns: bool = field(default=False)
    freeze_mm_mlp_adapter: bool = field(default=False)
    mpt_attn_impl: Optional[str] = field(default="triton")
    model_max_length: int = field(
        default=512,
        metadata={
            "help":
                "Maximum sequence length. Sequences will be right padded (and possibly truncated)."
        },
    )
    double_quant: bool = field(
        default=True,
        metadata={"help": "Compress the quantization statistics through double quantization."}
    )
    quant_type: str = field(
        default="nf4",
        metadata={"help": "Quantization data type to use. Should be one of `fp4` or `nf4`."}
    )
    bits: int = field(
        default=16,
        metadata={"help": "How many bits to use."}
    )
    lora_enable: bool = False  
    lora_r: int = 64
    lora_alpha: int = 16
    lora_dropout: float = 0.05
    lora_weight_path: str = ""
    lora_bias: str = "none"
    dataloader_drop_last: bool = True

    # set to True if you want to use handal dataset
    is_handal: bool = False
    # set to True if you want to use joint training 
    joint_training: bool = False
    # set to True if you want to use the first stage of training
    first_stage: bool = False
    # pretrained model path
    pretrained_model_path: str = "/path/to/pretrained_model"
    output_dir: str = "/path/to/output_dir"