Instructions to use ChengKai2025/world2hand with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Diffusers
How to use ChengKai2025/world2hand with Diffusers:
pip install -U diffusers transformers accelerate
import torch from diffusers import DiffusionPipeline # switch to "mps" for apple devices pipe = DiffusionPipeline.from_pretrained("ChengKai2025/world2hand", dtype=torch.bfloat16, device_map="cuda") prompt = "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k" image = pipe(prompt).images[0] - Notebooks
- Google Colab
- Kaggle
Add projector config: loss_l2_4ds_resume40k
Browse files
projector/loss_l2_4ds_resume40k/config.yaml
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
config: /mnt/slurm_home/ckjin/vdm/world2hand_metrics/world2hand/config/projector/presets/cached_4ds_5ds37k/loss_ablation/loss_l2_4ds_resume40k.yaml
|
| 2 |
+
model_type: td_xattn_heatmap
|
| 3 |
+
json_path: /mnt/slurm_home/ckjin/vdm/world2hand_metrics/world2hand/data_json/xray/cached5ds37k_arctic_train.json
|
| 4 |
+
json_path_weights:
|
| 5 |
+
- 2145
|
| 6 |
+
- 1086
|
| 7 |
+
- 910
|
| 8 |
+
- 801
|
| 9 |
+
eval_json:
|
| 10 |
+
- /mnt/slurm_home/ckjin/vdm/world2hand_metrics/world2hand/data_json/xray/cached5ds37k_arctic_test.json
|
| 11 |
+
- /mnt/slurm_home/ckjin/vdm/world2hand_metrics/world2hand/data_json/xray/cached5ds37k_hot3d_test.json
|
| 12 |
+
- /mnt/slurm_home/ckjin/vdm/world2hand_metrics/world2hand/data_json/xray/cached5ds37k_hoi4d_test.json
|
| 13 |
+
- /mnt/slurm_home/ckjin/vdm/world2hand_metrics/world2hand/data_json/xray/cached5ds37k_h2o_test.json
|
| 14 |
+
vace_features_dir: /mnt/slurm_home/ckjin/vdm/checkpoint_mixed_5ds_37k/checkpoint-37000/features/L15_s34
|
| 15 |
+
output_dir: experiments/projector/cached_4ds_5ds37k/loss_ablation/loss_l2_4ds_resume40k
|
| 16 |
+
output_under_vace_ckpt: false
|
| 17 |
+
projector_output_root: experiments/projector
|
| 18 |
+
eval_output_dir: experiments/projector/cached_4ds_5ds37k/loss_ablation/loss_l2_4ds_resume40k
|
| 19 |
+
viz_default_k: false
|
| 20 |
+
checkpoint: /mnt/slurm_home/ckjin/vdm/world2hand_metrics/world2hand/experiments/projector/cached_4ds_5ds37k/L15_s34_v3_04b_xattn_v2backbone_4ds/step_20000.pt
|
| 21 |
+
start_step: 20000
|
| 22 |
+
max_steps: 40000
|
| 23 |
+
skip_eval: false
|
| 24 |
+
wandb: true
|
| 25 |
+
wandb_project: world2hand
|
| 26 |
+
wandb_run_name: cached_4ds_5ds37k_v3_04b_loss_l2_4ds_resume40k
|
| 27 |
+
lr: 0.0002
|
| 28 |
+
batch_size: 8
|
| 29 |
+
lr_schedule: cosine
|
| 30 |
+
warmup_steps: 200
|
| 31 |
+
lr_min_ratio: 0.05
|
| 32 |
+
max_grad_norm: 1.0
|
| 33 |
+
log_every: 10
|
| 34 |
+
eval_every: 5000
|
| 35 |
+
eval_subset_seed: 0
|
| 36 |
+
periodic_test_render_n: 0
|
| 37 |
+
train_num_workers: 4
|
| 38 |
+
dataloader_prefetch_factor: 4
|
| 39 |
+
dataloader_pin_memory: false
|
| 40 |
+
preload_ram_threshold: 50
|
| 41 |
+
preload_train: false
|
| 42 |
+
vace_layer: 15
|
| 43 |
+
vace_capture_step: 34
|
| 44 |
+
presence_upsample_ratio: 0.0
|
| 45 |
+
presence_dedup_window: 8
|
| 46 |
+
presence_partial_threshold_frames: 10
|
| 47 |
+
v2_td_hidden_dim: 512
|
| 48 |
+
v2_td_num_shared_decoder_layers: 4
|
| 49 |
+
v2_td_num_cls_decoder_layers: 1
|
| 50 |
+
v2_td_num_reg_decoder_layers: 2
|
| 51 |
+
v2_td_aux_loss: false
|
| 52 |
+
v2_td_cam_intrinsics_mode: ray
|
| 53 |
+
td_heatmap_n_joints: 21
|
| 54 |
+
td_heatmap_origin_joint_idx: 9
|
| 55 |
+
td_heatmap_range_margin: 0.15
|
| 56 |
+
td_heatmap_pnp_min_visible: 1
|
| 57 |
+
m2dp_enable_per_hand_oos: true
|
| 58 |
+
td_xattn_num_heads: 8
|
| 59 |
+
td_xattn_dropout: 0.0
|
| 60 |
+
td_xattn_bidirectional: true
|
| 61 |
+
td_xattn_use_offset_mlp: true
|
| 62 |
+
vace_num_inference_steps: 25
|
| 63 |
+
eval_batch_size: 16
|
| 64 |
+
use_geodesic: true
|
| 65 |
+
go_loss_weight: 0.2
|
| 66 |
+
hp_loss_weight: 0.2
|
| 67 |
+
betas_loss_weight: 0.1
|
| 68 |
+
trans_loss_weight: 3.0
|
| 69 |
+
joints_loss_weight: 3.5
|
| 70 |
+
reproj_loss_weight: 1.0
|
| 71 |
+
exists_2d_loss_weight: 0.5
|
| 72 |
+
exists_3d_loss_weight: 0.0
|
| 73 |
+
is_right_loss_weight: 0.5
|
| 74 |
+
velocity_smooth_weight: 0.3
|
| 75 |
+
betas_consistency_weight: 0.05
|
| 76 |
+
aux_loss_weight: 0.0
|
| 77 |
+
anchor2d_direct_weight: 0.0
|
| 78 |
+
joints2d_direct_weight: 2.0
|