File size: 5,752 Bytes
36c1e62 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
from dataclasses import dataclass, field
from typing import Optional
import transformers
@dataclass
class DataArguments:
lazy_preprocess: bool = False
only_two_class: bool = False
old_two_class: bool = False
is_multimodal: bool = False
# image path
image_folder: Optional[str] = field(default='/home/emzhang/data/segmentation/refer_seg/images/mscoco/images/train2014')
mask_config: Optional[str] = field(default="./objectrelator/mask_config/maskformer2_swin_base_384_bs16_50ep.yaml")
image_aspect_ratio: str = 'square'
image_grid_pinpoints: Optional[str] = field(default=None)
region_mask_type: Optional[str] = field(default=None)
# json path from building script
json_path: str = '/home/emzhang/code/LLaVA/datasets/refcoco/refcoco_val.json'
# json file to split takes
split_path: str = ''
split: str = 'val'
model_path: str = '/home/emzhang/code/llava_zem/checkpoints/SEG_class_refcoco_after_fixbug'
model_map_name: str = 'ObjectRelator'
SEG_norm: bool = field(default=False)
SEG_proj: bool = field(default=True)
criterion_type: Optional[str] = field(default="concat_seg")
matcher_type: Optional[str] = field(default="wo_class")
llm_pos: Optional[str] = field(default="none")
ln_2048: bool = field(default=False)
version_val: str = 'opt-iml-1.3b'
seg_idx_back: bool = field(default=False)
segmentation: bool = True
eval_batch_size: int = 1
dataloader_num_workers_val: int = 4
thr: float = 0.5
topk: int=1
fuse_score: bool = field(default=False)
seg_task: Optional[str] = field(default="region")
seg_last: bool = field(default=True)
num_chunks: int=1
chunk_idx: int=0
# multi-condition/single-condition
condition: str = 'multi-condition'
# select certain frame
select_id: Optional[int] = field(default=None)
# for training
refcoco_image_folder: Optional[str] = "/path/to/refer_seg/images/mscoco/images/train2014"
image_first: bool = field(default=True)
instruction_version: str = 'v1'
instance_json_path: str = '/path/to/instruction_segmentation_train.json'
lvis_json_path: str = '/path/to/lvis_instance_train.json'
lvis_categories_path: str = '/path/to/lvis_instance_categories.json'
# json path from building script
region_json_path: str = '/path/to/visual_prompt_segmentation_train.json'
panoptic_json_path: str = "/path/to/coco"
ref_coco_path: str = '/path/to/refcoco/refcoco_train.json'
ref_coco_plus_path: str = '/path/to/refcoco+/refcoco+_train.json'
ref_coco_g_path: str = '/path/to/refcocog/refcocog_train.json'
mmconv_path: str = '/path/to/llava_1_5'
data_ratio: str = '1||1||1||1'
fix_dataset_len: int = 0
# json paths for joint training
joint_json_ego2exo: str = '/path/to/joint_ego_exo.json'
joint_json_exo2ego: str = '/path/to/joint_exo_ego.json'
@dataclass
class ModelArguments:
model_name_or_path: Optional[str] = field(default="facebook/opt-125m")
version: Optional[str] = field(default="v0")
freeze_backbone: bool = field(default=False)
train_backbone: bool = field(default=False)
tune_mm_mlp_adapter: bool = field(default=False)
vision_tower: Optional[str] = field(default=None)
mm_vision_select_layer: Optional[int] = field(default=-1) # default to the last layer
pretrain_mm_mlp_adapter: Optional[str] = field(default=None)
mm_use_im_start_end: bool = field(default=False)
mm_use_im_patch_token: bool = field(default=True)
mm_vision_select_feature: Optional[str] = field(default="patch")
with_norm: bool = field(default=True)
with_layernorm: bool = field(default=False)
skip_init_vision: bool = field(default=False)
with_sam: bool = field(default=False)
with_swin: bool = field(default=False)
with_teacher: bool = field(default=False)
swin_type: Optional[str] = field(default="base")
projector_outdim: Optional[int] = field(default=2048)
mm_projector_type: Optional[str] = field(default="swin_conv")
model_version: Optional[str] = field(default="v1")
load_mask2former: bool = field(default=True)
dino_path: Optional[str] = field(default=None)
@dataclass
class TrainingArguments(transformers.TrainingArguments):
cache_dir: Optional[str] = field(default=None)
optim: str = field(default="adamw_torch")
remove_unused_columns: bool = field(default=False)
freeze_mm_mlp_adapter: bool = field(default=False)
mpt_attn_impl: Optional[str] = field(default="triton")
model_max_length: int = field(
default=512,
metadata={
"help":
"Maximum sequence length. Sequences will be right padded (and possibly truncated)."
},
)
double_quant: bool = field(
default=True,
metadata={"help": "Compress the quantization statistics through double quantization."}
)
quant_type: str = field(
default="nf4",
metadata={"help": "Quantization data type to use. Should be one of `fp4` or `nf4`."}
)
bits: int = field(
default=16,
metadata={"help": "How many bits to use."}
)
lora_enable: bool = False
lora_r: int = 64
lora_alpha: int = 16
lora_dropout: float = 0.05
lora_weight_path: str = ""
lora_bias: str = "none"
dataloader_drop_last: bool = True
# set to True if you want to use handal dataset
is_handal: bool = False
# set to True if you want to use joint training
joint_training: bool = False
# set to True if you want to use the first stage of training
first_stage: bool = False
# pretrained model path
pretrained_model_path: str = "/path/to/pretrained_model"
output_dir: str = "/path/to/output_dir" |