### train_pose2rot_v7_ddp_b4.yaml — v6 recipe (memabl+tvar) at batch4 DDP-2gpu for 3.6x throughput ###
name: Pose2Rot training

runtime:
  device: cuda
  seed: 42
  debug: false

output:
  checkpoint_root: ./checkpoints/pose2rot


experiment:
  exp: exp_pose2rot_v10_split_heldout

model:
  target: models.v2.pose2rot.model.Pose2RotMemoryRestModel
  params:
    q_dim: 256
    rest_layers: 4
    pose_layers: 4
    memory_layers: 4
    decoder_layers: 10
    num_heads: 8
    joint_embed_dim: 768
    temporal_window: 2
    temporal_dropout: 0.1
    decoder_cond_mode: add # add | concat
    pose_rest_film: true
    memory_rest_film: true
    decoder_rest_film: true
    pose_use_graph: true
    use_grad_checkpoint: false
    decoder_use_cross_layers: 0   # MEMORY ABLATION: no decoder cross-attn into memory bank (kill species-constant leakage)

train:
  batch_size: 4   # DDP global batch = 4/gpu x 2 gpu = 8
  epochs: 60
  grad_accum_steps: 1
  lr: 0.0002   # DDP 2-gpu global batch 8 at the proven-safe lr2e-4 (test if batch-doubling breaks anti-collapse)
  warmup_steps: 500   # linear LR warmup 0->8e-4 (codex: tame Adam startup at large scaled LR)
  max_ckpt: 100
  num_workers_train: 6   # 6/proc x 2 proc = 12 of 16 cores
  test_every: 1
  pretrain_ckpt: null

  loss:
    rot_loss_type: smooth_l1
    vel_loss_type: smooth_l1
    acc_loss_type: smooth_l1

  weight:
    root_wt: 0.1
    fk_wt: 10.0   # FK ramp END=10(用户按MoCapAnything; 比v8b的30温和, fk梯度~0.33<<grad_clip1.0 不会再亚稳发散)
    fk_wt_start: 0.0   # 从0起(纯抗塌缩早期, 让tvar正常破塌缩)
    fk_ramp_start_epoch: 5
    fk_ramp_end_epoch: 15    # 线性 0->10 over epoch5-15, 之后恒10
    vel_wt: 1.0
    acc_wt: 1.0   # 用户要求加 acc(2阶时序平滑); 随机初始化量级~0.0095
    rot_wt: 1.0
    tvar_wt: 2.0   # demeaned-temporal supervision (force motion-tracking, anti-collapse)

  vis_every: 5
  weight_decay: 0.0

eval:
  batch_size: 1
  num_workers: 2

data:
  seq_len: 48
  bvh_dir: datasets/zoo1030/bvh
  cache_scale: true
  limit_species_debug: []
  mmap: true
  split_json: datasets/zoo1030/test_split_seen_rare_unseen.json
  train_memory_pkl_path: datasets/zoo1030/cache/species_fps_memory_yAll/fps_select_by_rot_32.pkl
  test_memory_pkl_path: datasets/zoo1030/cache/species_fps_memory_yAll/fps_select_by_rot_32.pkl