pose2rot / config_v10_split_heldout.yaml
Tevior's picture
Upload folder using huggingface_hub
1e44786 verified
Raw
History Blame Contribute Delete
2.36 kB
### train_pose2rot_v7_ddp_b4.yaml — v6 recipe (memabl+tvar) at batch4 DDP-2gpu for 3.6x throughput ###
name: Pose2Rot training
runtime:
device: cuda
seed: 42
debug: false
output:
checkpoint_root: ./checkpoints/pose2rot
experiment:
exp: exp_pose2rot_v10_split_heldout
model:
target: models.v2.pose2rot.model.Pose2RotMemoryRestModel
params:
q_dim: 256
rest_layers: 4
pose_layers: 4
memory_layers: 4
decoder_layers: 10
num_heads: 8
joint_embed_dim: 768
temporal_window: 2
temporal_dropout: 0.1
decoder_cond_mode: add # add | concat
pose_rest_film: true
memory_rest_film: true
decoder_rest_film: true
pose_use_graph: true
use_grad_checkpoint: false
decoder_use_cross_layers: 0 # MEMORY ABLATION: no decoder cross-attn into memory bank (kill species-constant leakage)
train:
batch_size: 4 # DDP global batch = 4/gpu x 2 gpu = 8
epochs: 60
grad_accum_steps: 1
lr: 0.0002 # DDP 2-gpu global batch 8 at the proven-safe lr2e-4 (test if batch-doubling breaks anti-collapse)
warmup_steps: 500 # linear LR warmup 0->8e-4 (codex: tame Adam startup at large scaled LR)
max_ckpt: 100
num_workers_train: 6 # 6/proc x 2 proc = 12 of 16 cores
test_every: 1
pretrain_ckpt: null
loss:
rot_loss_type: smooth_l1
vel_loss_type: smooth_l1
acc_loss_type: smooth_l1
weight:
root_wt: 0.1
fk_wt: 10.0 # FK ramp END=10(用户按MoCapAnything; 比v8b的30温和, fk梯度~0.33<<grad_clip1.0 不会再亚稳发散)
fk_wt_start: 0.0 # 从0起(纯抗塌缩早期, 让tvar正常破塌缩)
fk_ramp_start_epoch: 5
fk_ramp_end_epoch: 15 # 线性 0->10 over epoch5-15, 之后恒10
vel_wt: 1.0
acc_wt: 1.0 # 用户要求加 acc(2阶时序平滑); 随机初始化量级~0.0095
rot_wt: 1.0
tvar_wt: 2.0 # demeaned-temporal supervision (force motion-tracking, anti-collapse)
vis_every: 5
weight_decay: 0.0
eval:
batch_size: 1
num_workers: 2
data:
seq_len: 48
bvh_dir: datasets/zoo1030/bvh
cache_scale: true
limit_species_debug: []
mmap: true
split_json: datasets/zoo1030/test_split_seen_rare_unseen.json
train_memory_pkl_path: datasets/zoo1030/cache/species_fps_memory_yAll/fps_select_by_rot_32.pkl
test_memory_pkl_path: datasets/zoo1030/cache/species_fps_memory_yAll/fps_select_by_rot_32.pkl