litwell's picture
Upload models/src/training/params.py with huggingface_hub
fa5b6e0 verified
from dataclasses import dataclass, field
from typing import Optional
from transformers import TrainingArguments
@dataclass
class ModelArguments:
model_id: Optional[str] = field(default="Qwen/Qwen2-VL-7B-Instruct")
@dataclass
class TrainingArguments(TrainingArguments):
cache_dir: Optional[str] = field(default=None)
optim: str = field(default="adamw_torch")
adam_beta1: float = field(default=0.9)
adam_beta2: float = field(default=0.999)
adam_epsilon: float = field(default=1e-8)
freeze_vision_tower: bool = field(default=False)
freeze_llm: bool = field(default=False)
tune_merger: bool = field(default=False)
disable_flash_attn2: bool = field(default=False)
max_seq_length: int = field(
default=32768, # This is the default value of the qwen2-vl model
metadata={
"help":
"Maximum sequence length. Sequences will be right padded (and possibly truncated)."
},
)
double_quant: bool = field(
default=True,
metadata={"help": "Compress the quantization statistics through double quantization."}
)
quant_type: str = field(
default="nf4",
metadata={"help": "Quantization data type to use. Should be one of `fp4` or `nf4`."}
)
bits: int = field(
default=16,
metadata={"help": "How many bits to use."}
)
lora_enable: bool = False
vision_lora: bool = False
use_dora: bool = False
lora_rank: int = 64
lora_alpha: int = 16
lora_dropout: float = 0.05
lora_weight_path: str = ""
lora_bias: str = "none"
vision_lr: Optional[float] = None
merger_lr: Optional[float] = None
lora_namespan_exclude: str = field(default=None, metadata={"help": "List of namespan to exclude for LoRA"})
num_lora_modules: int = -1
use_liger: bool = True
@dataclass
class DataArguments:
data_path: str = field(
default=None, metadata={"help": "Path to the training data."}
)
lazy_preprocess: bool = False
image_folder: Optional[str] = field(default=None)
image_min_pixels: Optional[int] = field(default=3136)
image_max_pixels: Optional[int] = field(default=12845056)
video_min_pixels: Optional[int] = field(default=100352)
video_max_pixels: Optional[int] = field(default=602112)
fps: float = 1.0