ATCTrack-VLM / experiments /atctrack /atctrack_base.yaml
SunXiang2025's picture
Upload ATCTrack-VLM code and selected checkpoints
25986db verified
DATA:
MAX_SAMPLE_INTERVAL: 400
MEAN:
- 0.485
- 0.456
- 0.406
SEARCH:
CENTER_JITTER: 3.5
FACTOR: 4.0
SCALE_JITTER: 0.5
SIZE: 256
NUMBER: 4 # 6
STD:
- 0.229
- 0.224
- 0.225
TEMPLATE:
CENTER_JITTER: 0
FACTOR: 2.0
SCALE_JITTER: 0
SIZE: 128
NUMBER: 2
TRAIN:
DATASETS_NAME:
- LASOT
# - GOT10K_vottrain
# - TRACKINGNET
# - VastTrack
# - TNL2K_train
# - RefCOCO14 # RefCOCO14,COCO17
# - OTB99_train
DATASETS_RATIO:
# - 6
# - 6
# - 6
# - 6
# - 6
# - 6
- 1
SAMPLE_PER_EPOCH: 25000
MODEL:
PRETRAIN_FILE: "fast_itpn_base_clipl_e1600.pt" # for backbone
PRETRAINED_PATH: "checkpoint/ATCTrack_b.pth.tar"
TARGET_STATE:
ENABLE: true
MODEL_PATH: "../Qwen/Qwen3.5-4B"
TOKEN: "<TARGET_STATE>"
FREEZE_QWEN: true
TRAIN_TOKEN_EMBEDDING: true
USE_LORA: true
LORA_R: 8
LORA_ALPHA: 16
LORA_DROPOUT: 0.05
LORA_TARGET_MODULES:
- in_proj_qkv
- out_proj
- in_proj_z
- in_proj_b
- in_proj_a
- q_proj
- k_proj
- v_proj
- o_proj
- gate_proj
- up_proj
- down_proj
BACKBONE:
TYPE: itpn_base
STRIDE: 16
HEAD:
TYPE: CENTER
NUM_CHANNELS: 256
HIDDEN_DIM: 512
TRAIN:
BACKBONE_MULTIPLIER: 0.1
DROP_PATH_RATE: 0.1
BATCH_SIZE: 1
EPOCH: 180
GIOU_WEIGHT: 2.0
L1_WEIGHT: 5.0
GRAD_CLIP_NORM: 0.1
LR: 0.0001
LR_DROP_EPOCH: 144 # 4/5(0.8)
NUM_WORKER: 8
OPTIMIZER: ADAMW
PRINT_INTERVAL: 50
SCHEDULER:
TYPE: step
DECAY_RATE: 0.1
WEIGHT_DECAY: 0.0001
AMP: False
FIX_BN: true
TYPE: target_state # train target-state projector/gate and tracker head
TEST:
EPOCH: 240
SEARCH_FACTOR: 4.0
SEARCH_SIZE: 256
TEMPLATE_FACTOR: 2.0
TEMPLATE_SIZE: 128
WINDOW: true
NUM_TEMPLATES: 2