Spaces:

ckc99u
/

MagicArt

Runtime error

App Files Files Community

ckc99u commited on Nov 30, 2025

Commit

c0c8ef0

verified ·

1 Parent(s): d1621dc

Upload 33 files

Browse files

Files changed (33) hide show

.gitattributes +40 -35
README.md +13 -13
app.py +326 -0
assets/MagicArticulate_teaser.gif +3 -0
assets/ar_demo.gif +3 -0
assets/articulation-xl2.0.png +3 -0
assets/data_statistics.png +0 -0
assets/sequence_ordering_demo.gif +3 -0
assets/skeleton_compare.png +0 -0
data_utils/README.md +43 -0
data_utils/clean_skin_in_npz.py +95 -0
data_utils/convert_npz_to_mesh_rig.py +107 -0
data_utils/data_loader.py +121 -0
data_utils/examples/0a59c5ffa4a1476bac6d540b79947f31.obj +0 -0
data_utils/examples/0a59c5ffa4a1476bac6d540b79947f31.txt +0 -0
data_utils/examples/0a59c5ffa4a1476bac6d540b79947f31_render_results.png +3 -0
data_utils/issue_data_list.txt +123 -0
data_utils/pyrender_wrapper.py +135 -0
data_utils/read_npz.py +43 -0
data_utils/read_rig_mesh_from_glb.py +198 -0
data_utils/render_data.py +61 -0
data_utils/save_npz.py +256 -0
data_utils/update_npz_rm_issue_data.py +59 -0
demo.py +214 -0
demo.sh +4 -0
download.py +19 -0
requirements.txt +37 -0
skeleton_models/shape_opt.py +406 -0
skeleton_models/skeletongen.py +198 -0
utils/eval_utils.py +57 -0
utils/mesh_to_pc.py +84 -0
utils/save_utils.py +578 -0
utils/skeleton_data_loader.py +97 -0

.gitattributes CHANGED Viewed

@@ -1,35 +1,40 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+assets/ar_demo.gif filter=lfs diff=lfs merge=lfs -text
+assets/articulation-xl2.0.png filter=lfs diff=lfs merge=lfs -text
+assets/MagicArticulate_teaser.gif filter=lfs diff=lfs merge=lfs -text
+assets/sequence_ordering_demo.gif filter=lfs diff=lfs merge=lfs -text
+data_utils/examples/0a59c5ffa4a1476bac6d540b79947f31_render_results.png filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,13 +1,13 @@
----
-title: MagicArt
-emoji: 🏆
-colorFrom: blue
-colorTo: pink
-sdk: gradio
-sdk_version: 6.0.1
-app_file: app.py
-pinned: false
-short_description: obj to rig test
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+---
+title: MagicArt
+emoji: 🏆
+colorFrom: blue
+colorTo: pink
+sdk: gradio
+sdk_version: 6.0.1
+app_file: app.py
+pinned: false
+short_description: obj to rig test
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,326 @@

+import os
+import torch
+import trimesh
+import numpy as np
+import gradio as gr
+from pathlib import Path
+import tempfile
+import shutil
+from skeleton_models.skeletongen import SkeletonGPT
+from data_utils.save_npz import normalize_to_unit_cube
+from utils.mesh_to_pc import MeshProcessor
+from utils.save_utils import (
+    pred_joints_and_bones,
+    save_skeleton_to_txt,
+    merge_duplicate_joints_and_fix_bones,
+    save_skeleton_obj,
+    save_mesh
+)
+# Global model variable
+model = None
+args_config = None
+def initialize_model():
+    """Initialize the model once at startup"""
+    global model, args_config
+    if model is not None:
+        return
+    print("Initializing MagicArticulate model...")
+    # Create a simple args object with default parameters
+    class Args:
+        def __init__(self):
+            self.input_pc_num = 8192
+            self.num_beams = 1
+            self.llm = "facebook/opt-350m"
+            self.pad_id = -1
+            self.n_discrete_size = 128
+            self.n_max_bones = 100
+            self.seed = 0
+            self.precision = "fp16"
+            self.pretrained_weights = "checkpoints/checkpoint_trainonv2_hier.pt"  # Default checkpoint
+            self.hier_order = False
+    args_config = Args()
+    # Load model
+    model = SkeletonGPT(args_config).cuda()
+    # Load pretrained weights
+    if os.path.exists(args_config.pretrained_weights):
+        pkg = torch.load(args_config.pretrained_weights, map_location=torch.device("cuda" if torch.cuda.is_available() else "cpu"))
+        model.load_state_dict(pkg["model"])
+        model.eval()
+        print("Model loaded successfully!")
+    else:
+        print(f"Warning: Pretrained weights not found at {args_config.pretrained_weights}")
+        raise FileNotFoundError("Model checkpoint not found. Please ensure checkpoints are downloaded.")
+def process_mesh(
+    input_file,
+    apply_marching_cubes,
+    hier_order,
+    octree_depth
+):
+    """
+    Process the input mesh and generate rigging prediction
+    Args:
+        input_file: Uploaded mesh file (.obj, .ply, or .stl)
+        apply_marching_cubes: Whether to apply marching cubes
+        hier_order: Whether to use hierarchical ordering
+        octree_depth: Depth for octree (if using marching cubes)
+    Returns:
+        Tuple of (skeleton obj file, rig txt file, normalized mesh file, status message)
+    """
+    try:
+        # Initialize model if not already done
+        if model is None:
+            initialize_model()
+        # Create temporary output directory
+        output_dir = tempfile.mkdtemp()
+        # Get file information
+        file_name = Path(input_file).stem
+        file_ext = Path(input_file).suffix.lower()
+        # Check file type
+        if file_ext not in ['.obj', '.ply', '.stl']:
+            return None, None, None, f"Error: Unsupported file type {file_ext}. Please upload .obj, .ply, or .stl file."
+        # Load mesh
+        mesh = trimesh.load(input_file, force='mesh')
+        # Convert mesh to point cloud
+        print(f"Converting mesh to point cloud (apply_marching_cubes={apply_marching_cubes})...")
+        pc_list = MeshProcessor.convert_meshes_to_point_clouds(
+            [mesh],
+            args_config.input_pc_num,
+            apply_marching_cubes=apply_marching_cubes,
+            octree_depth=octree_depth
+        )
+        pc_normal = pc_list[0]
+        # Normalize point cloud
+        pc_coor = pc_normal[:, :3]
+        normals = pc_normal[:, 3:]
+        pc_coor, center, scale = normalize_to_unit_cube(pc_coor, scale_factor=0.9995)
+        pc_coor = pc_coor.astype(np.float32)
+        normals = normals.astype(np.float32)
+        # Calculate transform parameters
+        bounds = np.array([pc_coor.min(axis=0), pc_coor.max(axis=0)])
+        pc_center = (bounds[0] + bounds[1])[None, :] / 2
+        pc_scale = ((bounds[1] - bounds[0]).max() + 1e-5)
+        transform_params = torch.tensor([
+            center[0], center[1], center[2],
+            scale,
+            pc_center[0][0], pc_center[0][1], pc_center[0][2],
+            pc_scale
+        ], dtype=torch.float32)
+        # Prepare batch data
+        pc_normal_tensor = torch.from_numpy(
+            np.concatenate([pc_coor, normals], axis=-1).astype(np.float16)
+        ).unsqueeze(0).cuda()
+        batch_data = {
+            'pc_normal': pc_normal_tensor,
+            'file_name': [file_name],
+            'transform_params': transform_params.unsqueeze(0).cuda(),
+            'vertices': torch.from_numpy(mesh.vertices).unsqueeze(0).cuda(),
+            'faces': torch.from_numpy(mesh.faces).unsqueeze(0).cuda()
+        }
+        # Generate skeleton
+        print("Generating skeleton...")
+        with torch.no_grad():
+            pred_bone_coords = model.generate(batch_data)
+        # Process predictions
+        skeleton = pred_bone_coords[0].cpu().numpy()
+        pred_joints, pred_bones = pred_joints_and_bones(skeleton.squeeze())
+        # Post-process: merge duplicate joints
+        if hier_order:
+            pred_root_index = pred_bones[0][0]
+            pred_joints, pred_bones, pred_root_index = merge_duplicate_joints_and_fix_bones(
+                pred_joints, pred_bones, root_index=pred_root_index
+            )
+        else:
+            pred_joints, pred_bones = merge_duplicate_joints_and_fix_bones(pred_joints, pred_bones)
+            pred_root_index = None
+        # Denormalize joints for rig file
+        transform_params_np = transform_params.cpu().numpy()
+        trans = transform_params_np[:3]
+        scale_val = transform_params_np[3]
+        pc_trans = transform_params_np[4:7]
+        pc_scale_val = transform_params_np[7]
+        pred_joints_denorm = pred_joints * pc_scale_val + pc_trans
+        pred_joints_denorm = pred_joints_denorm / scale_val + trans
+        # Save outputs
+        skel_obj_path = os.path.join(output_dir, f'{file_name}_skel.obj')
+        rig_txt_path = os.path.join(output_dir, f'{file_name}_pred.txt')
+        mesh_obj_path = os.path.join(output_dir, f'{file_name}_mesh.obj')
+        # Save skeleton
+        save_skeleton_obj(
+            pred_joints,
+            pred_bones,
+            skel_obj_path,
+            pred_root_index if hier_order else None,
+            use_cone=hier_order
+        )
+        # Save rig
+        vertices_np = mesh.vertices
+        save_skeleton_to_txt(
+            pred_joints_denorm,
+            pred_bones,
+            pred_root_index,
+            hier_order,
+            vertices_np,
+            rig_txt_path
+        )
+        # Save normalized mesh
+        vertices_norm = (vertices_np - trans) * scale_val
+        vertices_norm = (vertices_norm - pc_trans) / pc_scale_val
+        save_mesh(vertices_norm, mesh.faces, mesh_obj_path)
+        status_msg = f"✅ Success! Generated skeleton with {len(pred_joints)} joints and {len(pred_bones)} bones."
+        return skel_obj_path, rig_txt_path, mesh_obj_path, status_msg
+    except Exception as e:
+        import traceback
+        error_msg = f"❌ Error processing mesh: {str(e)}\n{traceback.format_exc()}"
+        print(error_msg)
+        return None, None, None, error_msg
+# Create Gradio interface
+def create_interface():
+    """Create the Gradio interface"""
+    with gr.Blocks(title="MagicArticulate - 3D Model Rigging") as demo:
+        gr.Markdown("""
+        # 🪄 MagicArticulate: Make Your 3D Models Articulation-Ready
+        Upload a 3D mesh (.obj, .ply, or .stl) to automatically generate skeletal rigging.
+        **Paper**: [CVPR 2025] MagicArticulate ([Project Page](https://chaoyuesong.github.io/MagicArticulate/))
+        """)
+        with gr.Row():
+            with gr.Column(scale=1):
+                gr.Markdown("### Input")
+                input_file = gr.File(
+                    label="Upload 3D Mesh",
+                    file_types=[".obj", ".ply", ".stl"],
+                    type="filepath"
+                )
+                gr.Markdown("### Options")
+                apply_marching_cubes = gr.Checkbox(
+                    label="Apply Marching Cubes",
+                    value=False,
+                    info="Apply marching cubes for mesh processing (slower but more accurate)"
+                )
+                hier_order = gr.Checkbox(
+                    label="Hierarchical Ordering",
+                    value=False,
+                    info="Use hierarchical sequence ordering for skeleton generation"
+                )
+                octree_depth = gr.Slider(
+                    minimum=5,
+                    maximum=9,
+                    value=7,
+                    step=1,
+                    label="Octree Depth",
+                    info="Depth for octree (only used if Marching Cubes is enabled)"
+                )
+                generate_btn = gr.Button("🚀 Generate Rigging", variant="primary", size="lg")
+            with gr.Column(scale=1):
+                gr.Markdown("### Output")
+                status_text = gr.Textbox(
+                    label="Status",
+                    lines=3,
+                    interactive=False
+                )
+                skel_output = gr.File(
+                    label="📥 Skeleton (.obj)",
+                    interactive=False
+                )
+                rig_output = gr.File(
+                    label="📥 Rig Prediction (.txt)",
+                    interactive=False
+                )
+                mesh_output = gr.File(
+                    label="📥 Normalized Mesh (.obj)",
+                    interactive=False
+                )
+        gr.Markdown("""
+        ### About
+        MagicArticulate automatically generates skeletal structures for 3D models, making them ready for animation.
+        The system predicts joint positions and bone connections using a transformer-based approach.
+        **Outputs**:
+        - **Skeleton (.obj)**: 3D visualization of the generated skeleton
+        - **Rig Prediction (.txt)**: Detailed rigging information (joints, bones, hierarchy)
+        - **Normalized Mesh (.obj)**: The input mesh normalized to unit cube
+        **Citation**:
+        ```
+        @inproceedings{song2025magicarticulate,
+          title={MagicArticulate: Make Your 3D Models Articulation-Ready},
+          author={Song, Chaoyue and others},
+          booktitle={CVPR},
+          year={2025}
+        }
+        ```
+        """)
+        # Connect the button to the processing function
+        generate_btn.click(
+            fn=process_mesh,
+            inputs=[input_file, apply_marching_cubes, hier_order, octree_depth],
+            outputs=[skel_output, rig_output, mesh_output, status_text]
+        )
+    return demo
+if __name__ == "__main__":
+    # Initialize model at startup
+    try:
+        initialize_model()
+    except Exception as e:
+        print(f"Warning: Could not initialize model at startup: {e}")
+        print("Model will be initialized on first request.")
+    # Launch Gradio app
+    demo = create_interface()
+    demo.queue()
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False
+    )

assets/MagicArticulate_teaser.gif ADDED Viewed

Git LFS Details

SHA256: ba4f56e7485a641b9e1c62fbd3717cb54c2f3b3fe519263313ae1280f0877aa7
Pointer size: 132 Bytes
Size of remote file: 2 MB

assets/ar_demo.gif ADDED Viewed

Git LFS Details

SHA256: 9b0602987ba2f9299ce051d27e6ff97a1ebe1e02dd0916a4b842e5c9e31f6973
Pointer size: 131 Bytes
Size of remote file: 776 kB

assets/articulation-xl2.0.png ADDED Viewed

Git LFS Details

SHA256: 790a198a035f35052a51960b636153d9f51e2cf50f7e0b04eab1e5368f3a07c1
Pointer size: 131 Bytes
Size of remote file: 257 kB

assets/data_statistics.png ADDED Viewed

assets/sequence_ordering_demo.gif ADDED Viewed

Git LFS Details

SHA256: e672e1436727a4a08baf06060a39fba73b1a912ad1a7e0c772535f8a18299fd4
Pointer size: 131 Bytes
Size of remote file: 213 kB

assets/skeleton_compare.png ADDED Viewed

data_utils/README.md ADDED Viewed

	@@ -0,0 +1,43 @@

+## Preprocessed data
+We provide the preprocessed data that saved in NPZ files, which contain the following information:
+```
+'vertices', 'faces', 'normals', 'joints', 'bones', 'root_index', 'uuid', 'pc_w_norm', 'joint_names', 'skinning_weights_value', 'skinning_weights_rows', 'skinning_weights_cols', 'skinning_weights_shape'
+```
+You can check `read_npz.py` for how to read the NPZ files and `save_npz.py` for how we save them.
+Before saving them into NPZ files, we extract mesh(.obj) and rig(.txt) from downloaded 3D models from Objaverse-XL using Blender. The rig file follows the format in [RigNet](https://github.com/zhan-xu/RigNet), which includes the following entries:
+```
+joints [joint_name] [x] [y] [z]
+root [root_joint_name]
+skin [vertex_index] [joints_name1] [skinning_weight1] [joints_name2] [skinning_weight2] ...
+hier [parent_joint_name] [child_joint_name]
+```
+For an example, please see `examples/0a59c5ffa4a1476bac6d540b79947f31.txt`.
+If you want to convert NPZ file back to OBJ and TXT files, we give an example by running:
+```
+python convert_npz_to_mesh_rig.py
+```
+## Visualization
+We provide a method for visualizing 3D models with skeleton using [Pyrender](https://github.com/mmatl/pyrender), modified from [Lab4D](https://github.com/lab4d-org/lab4d/tree/ppr/). This visualization also serves as input to the VLM for skeleton quality rating. Make sure you have installed the following packages before running visualization:
+```
+pip install trimesh opencv-python pyrender
+```
+We provide an example to demonstrate the process. For this example, we prepare an OBJ file along with a TXT file containing rigging information. Then, run:
+```
+python render_data.py
+```
+You will obtain the following outputs:
+<p align="center">
+  <img width="80%" src="examples/0a59c5ffa4a1476bac6d540b79947f31_render_results.png"/>
+</p>
+### Reading rig and mesh from GLBs
+We provide the script we use for reading rig (.txt) and mesh (.obj) from glb files. You can run:
+```
+python read_rig_mesh_from_glb.py
+```
+Remember to download Blender (we use 4.2.0) and also bpy in your conda environment.

data_utils/clean_skin_in_npz.py ADDED Viewed

	@@ -0,0 +1,95 @@

+#  Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+import numpy as np
+import scipy.sparse as sp
+import os
+def check_and_clean_skinning_weights(file_path, output_path, tolerance=0.1):
+    """
+    Check if all rows in pc_skinning_weights sum to 1 for each item in the NPZ file.
+    Remove invalid items and save a cleaned version.
+    Args:
+        file_path: Path to the input NPZ file
+        output_path: Path for the cleaned NPZ file
+        tolerance: Tolerance for floating point comparison
+    Returns:
+        tuple: (cleaned_data_list, removed_indices)
+    """
+    data_list = np.load(file_path, allow_pickle=True)['arr_0']
+    invalid_indices = []
+    valid_data_list = []
+    for idx, data in enumerate(data_list):
+        is_valid = True
+        weights_data = data['skinning_weights_value']
+        weights_row = data['skinning_weights_row']
+        weights_col = data['skinning_weights_col']
+        weights_shape = data['skinning_weights_shape']
+        skinning_sparse = sp.coo_matrix(
+            (weights_data, (weights_row, weights_col)),
+            shape=weights_shape
+        )
+        skinning_csr = skinning_sparse.tocsr()
+        row_sums = np.array(skinning_csr.sum(axis=1)).flatten()
+        invalid_rows = np.where(np.abs(row_sums - 1.0) > tolerance)[0]
+        if len(invalid_rows) > 0:
+            min_sum = np.min(row_sums)
+            max_sum = np.max(row_sums)
+            invalid_indices.append((data['uuid'], f"{len(invalid_rows)} rows, range: [{min_sum:.6f}, {max_sum:.6f}]"))
+            is_valid = False
+        if is_valid:
+            valid_data_list.append(data)
+    # Save the cleaned data
+    if valid_data_list:
+        np.savez_compressed(output_path, valid_data_list, allow_pickle=True)
+        print(f"Saved {len(valid_data_list)} valid items to {output_path}")
+    return valid_data_list, invalid_indices
+def main():
+    # File paths
+    file_path = "articulation_xlv2_train.npz"  # "articulation_xlv2_test.npz"
+    log_file = "invalid_skinning_weights_intrain.txt" # "invalid_skinning_weights_intest.txt"
+    output_path = "articulation_xlv2_train_updated.npz"  # "articulation_xlv2_test_updated.npz"
+    # Clean the data
+    valid_data, invalid_indices = check_and_clean_skinning_weights(file_path, output_path)
+    # Log the results
+    with open(log_file, "w") as f:
+        f.write(f"Original file: {file_path}\n")
+        f.write(f"Cleaned file: {output_path}\n")
+        f.write(f"Total items: {len(np.load(file_path, allow_pickle=True)['arr_0'])}\n")
+        f.write(f"Valid items: {len(valid_data)}\n")
+        f.write(f"Removed items: {len(invalid_indices)}\n\n")
+        if invalid_indices:
+            f.write("Details of removed items:\n")
+            for idx, details in invalid_indices:
+                f.write(f"  Index {idx}: {details}\n")
+    print(f"Cleaning complete. Results written to {log_file}")
+if __name__ == "__main__":
+    main()

data_utils/convert_npz_to_mesh_rig.py ADDED Viewed

	@@ -0,0 +1,107 @@

+#  Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+"""
+You can convert npz file back to obj(mesh) and txt(rig) files using this python script.
+"""
+import os
+import numpy as np
+import scipy.sparse as sp
+def export_obj(vertices, faces, normals, output_path):
+    with open(output_path, 'w') as f:
+        for v in vertices:
+            f.write(f"v {v[0]} {v[1]} {v[2]}\n")
+        for n in normals:
+            f.write(f"vn {n[0]} {n[1]} {n[2]}\n")
+        for i, face in enumerate(faces):
+            # OBJ format is 1-based, so we add 1 to all indices
+            f.write(f"f {face[0]+1}//{face[0]+1} {face[1]+1}//{face[1]+1} {face[2]+1}//{face[2]+1}\n")
+def export_rig_txt(joints, bones, root_index, joint_names, skinning_weights, output_path):
+    """
+    joints [joint_name] [x] [y] [z]
+    root [root_joint_name]
+    skin [vertex_index] [joint_name1] [weight1] [joint_name2] [weight2] ...
+    hier [parent_joint_name] [child_joint_name]
+    """
+    n_joints = len(joints)
+    n_verts = skinning_weights.shape[0]  # (n_vertex, n_joints)
+    with open(output_path, 'w') as f:
+        # 1) joints
+        for i in range(n_joints):
+            x, y, z = joints[i]
+            jn = joint_names[i]
+            f.write(f"joints {jn} {x} {y} {z}\n")
+        # 2) root
+        root_name = joint_names[root_index]
+        f.write(f"root {root_name}\n")
+        # 3) skin
+        for vidx in range(n_verts):
+            row_weights = skinning_weights[vidx]
+            non_zero_indices = np.where(row_weights != 0)[0]
+            if len(non_zero_indices) == 0:
+                continue
+            line_parts = [f"skin {vidx}"]  # vertex_idx
+            for jidx in non_zero_indices:
+                w = row_weights[jidx]
+                jn = joint_names[jidx]
+                line_parts.append(jn)
+                line_parts.append(str(w))
+            f.write(" ".join(line_parts) + "\n")
+        # 4) hier
+        for p_idx, c_idx in bones:
+            p_name = joint_names[p_idx]
+            c_name = joint_names[c_idx]
+            f.write(f"hier {p_name} {c_name}\n")
+if __name__ == "__main__":
+    data = np.load('articulation_xlv2_test.npz', allow_pickle=True)
+    data_list = data['arr_0']
+    print(f"Loaded {len(data_list)} data entries")
+    model_data = data_list[0]
+    print("Data keys:", model_data.keys())
+    # 'vertices', 'faces', 'normals', 'joints', 'bones', 'root_index', 'uuid', 'joint_names',
+    # 'skinning_weights_value', 'skinning_weights_row', 'skinning_weights_col', 'skinning_weights_shape'
+    vertices = model_data['vertices']          # (n_vertex, 3)
+    faces = model_data['faces']                # (n_faces, 3)
+    normals = model_data['normals']            # (n_vertex, 3)
+    joints = model_data['joints']              # (n_joints, 3)
+    bones = model_data['bones']                # (n_bones, 2)
+    root_index = model_data['root_index']      # int
+    joint_names = model_data['joint_names']    # list of str
+    uuid_str = model_data['uuid']
+    skin_val = model_data['skinning_weights_value']
+    skin_row = model_data['skinning_weights_row']
+    skin_col = model_data['skinning_weights_col']
+    skin_shape = model_data['skinning_weights_shape']
+    skin_sparse = sp.coo_matrix((skin_val, (skin_row, skin_col)), shape=skin_shape)
+    skinning_weights = skin_sparse.toarray()  # (n_vertex, n_joints)
+    obj_path = f"{uuid_str}.obj"
+    export_obj(vertices, faces, normals, obj_path)
+    rig_txt_path = f"{uuid_str}.txt"
+    export_rig_txt(joints, bones, root_index, joint_names, skinning_weights, rig_txt_path)
+    print("Done!")

data_utils/data_loader.py ADDED Viewed

	@@ -0,0 +1,121 @@

+#  Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+import json
+import glob
+import numpy as np
+import trimesh
+class DataLoader:
+    def __init__(self):
+        self.joint_name_to_idx = {}
+    def load_rig_data(self, rig_path):
+        joints = []
+        joints_names = []
+        bones = []
+        with open(rig_path, 'r') as f:
+            for line in f:
+                parts = line.strip().split()
+                if parts[0] == 'joints':
+                    joint_name = parts[1]
+                    joint_pos = [float(parts[2]), float(parts[3]), float(parts[4])]
+                    self.joint_name_to_idx[joint_name] = len(joints)
+                    joints.append(joint_pos)
+                    joints_names.append(joint_name)
+                elif parts[0] == 'root':
+                    self.root_name = parts[1]
+                elif parts[0] == 'hier':
+                    parent_joint = self.joint_name_to_idx[parts[1]]
+                    child_joint = self.joint_name_to_idx[parts[2]]
+                    bones.append([parent_joint, child_joint])
+        self.joints = np.array(joints)
+        self.bones = np.array(bones)
+        self.joints_names = joints_names
+        self.root_idx = None
+        if self.root_name is not None:
+            self.root_idx = self.joint_name_to_idx[self.root_name]
+    def load_mesh(self, mesh_path):
+        mesh = trimesh.load(mesh_path, process=False)
+        mesh.visual.vertex_colors[:, 3] = 100  # set transparency
+        self.mesh = mesh
+        # Compute the centroid normal of the mesh
+        v = self.mesh.vertices
+        xmin, ymin, zmin = v.min(axis=0)
+        xmax, ymax, zmax = v.max(axis=0)
+        self.bbox_center = np.array([(xmax + xmin)/2, (ymax + ymin)/2, (zmax + zmin)/2])
+        self.bbox_size = np.array([xmax - xmin, ymax - ymin, zmax - zmin])
+        self.bbox_scale = max(xmax - xmin, ymax - ymin, zmax - zmin)
+        normal = mesh.center_mass - self.bbox_center
+        normal = normal / (np.linalg.norm(normal)+1e-5)
+        # Choose axis order based on normal direction
+        if abs(normal[1]) > abs(normal[2]):  # if Y component is dominant
+            self.axis_order = [0, 1, 2]  # swapping Y and Z
+        else:
+            self.axis_order =[0, 2, 1]  # keep default order
+        self.mesh.vertices = self.mesh.vertices[:, self.axis_order]
+        self.joints = self.joints[:, self.axis_order]
+        self.normalize_coordinates()
+    def normalize_coordinates(self):
+        # Compute scale and offset
+        scale = 1.0 / (self.bbox_scale+1e-5)
+        offset = -self.bbox_center
+        self.mesh.vertices = (self.mesh.vertices + offset) * scale
+        self.joints = (self.joints + offset) * scale
+        # Calculate appropriate radii based on the mean size
+        self.joint_radius = 0.01
+        self.bone_radius = 0.005
+    def query_mesh_rig(self):
+        input_dict = {"shape": self.mesh}
+        # Create joints as spheres
+        joint_meshes = []
+        for i, joint in enumerate(self.joints):
+            sphere = trimesh.creation.icosphere(
+                radius=self.joint_radius, subdivisions=2
+            )
+            sphere.apply_translation(joint)
+            if i == self.root_idx:
+                # root green
+                sphere.visual.vertex_colors = [0, 255, 0, 255]
+            else:
+                sphere.visual.vertex_colors = [0, 0, 255, 255]
+            joint_meshes.append(sphere)
+        input_dict["joint_meshes"] = trimesh.util.concatenate(joint_meshes)
+        # Create bones as cylinders
+        bone_meshes = []
+        for bone in self.bones:
+            start, end = self.joints[bone[0]], self.joints[bone[1]]
+            cylinder = trimesh.creation.cylinder(radius=self.bone_radius, segment=np.array([[0, 0, 0], end - start]))
+            cylinder.apply_translation(start)
+            cylinder.visual.vertex_colors = [255, 0, 0, 255]  #[0, 0, 255, 255]  # blue
+            bone_meshes.append(cylinder)
+        input_dict["bone_meshes"] = trimesh.util.concatenate(bone_meshes)
+        return input_dict

data_utils/examples/0a59c5ffa4a1476bac6d540b79947f31.obj ADDED Viewed

The diff for this file is too large to render. See raw diff

data_utils/examples/0a59c5ffa4a1476bac6d540b79947f31.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

data_utils/examples/0a59c5ffa4a1476bac6d540b79947f31_render_results.png ADDED Viewed

Git LFS Details

SHA256: c2bdbf4ee74444b43fa8343d710fef1bc1680f37ade69e620fcd27997cfa7a5e
Pointer size: 131 Bytes
Size of remote file: 177 kB

data_utils/issue_data_list.txt ADDED Viewed

	@@ -0,0 +1,123 @@

+0b1f1ccb-db41-5689-b363-fd8ca0145041
+d4705a2d-2dbf-5175-9fd0-b0cc538b9c4d
+12b3d88d-2845-57b7-b483-d3a766beeb0e
+778505b7-63da-5c08-bad7-6935fcd73cec
+35ed271f-e9d7-528f-b165-e25004ef802b
+0096279cc46c4d1d8e8611e611e2418b
+00ea25ccad8344cbaedc89d70bb75a49
+08b617be44b6466584ba9624f857222c
+0998722861ba489695ad8bd4456e76e6
+0bd786e936774176ac474694b0f6f876
+0c1a7657bea0421dadef56e2080f0297
+1073c44309524810b6cd4cef2d6e8008
+10b9c6e9bf214dc39476161dfe2eaa8a
+147df2ee69df488eb6cb2f88f2f703bb
+18ff6fa66b0d483a8758e4602e5b70b0
+1cf88736c59a43c88ba7dac44c929dab
+1e9544eea98d417db87347dcc16cb69e
+21a4bc038cbd415b8e09566148c87c46
+2809e172066d4140b1ddc9356490191a
+28483d55555f433d8fde4ba141ad5271
+31829af6c72146519d348a6d4d2bcc8b
+32202338cd5c40beace31deeacd598e5
+37fe21828c37413986a07a1bf8c75c93
+3857965c400c47c9a846c01eb1f36ed5
+404e622bdfd14ab693640ff86c131973
+44f8486a0b2c4f9489fc3912b2dcf880
+49580a36b07d47808aa91db6e2b9fcdd
+4db51555e8fd48a0905ecee93730f863
+57a9d6f9fec7430bae67d7d7a9bfdd2c
+593eeb44d67c49499d3580d908b9f5cd
+5a571bea2d0c4ad5b2cc912c3dc37a59
+5cd1f275bdb34d939ffaa07a641a2eef
+60ab9787fde64199ab59b728276b5cd8
+63453d744e3844d48bc9a7bedfe586a7
+6caf784e33084b1389fdea4043560d3f
+725ce5eae96b4602a3b8a30f73dcbc4c
+7f9c3d9ccbd949449f25f3711780c1e7
+80ff2e88de2144bbb21d231db5a02000
+835174fcce4a4969851ca1846b92036a
+85b73c92393e453faf0f7ec82d40720e
+860911c447744c0396b618db994c535e
+86d6d90704ff4e9c8fc0f0751bd837a2
+934b27da5e4249978bfa9c190ec01f9a
+968aecc8c38246f8af3d0d7fa169ca8f
+9fc1cb45c8404517aa8cee3bb47c14fd
+a65a935fd54b4159a2687bffef7cbf81
+af2f7b1678ea4194a9b8235e7dfd23b3
+b4cd213509ec4dcba41a280b4b013e63
+be7a64227e1f4f13b86389edc4926dfa
+bff3cd47d0574f73980b3af9f7790c58
+c8ac24a9bf2647fb9e7565eaf3a28558
+cc1f905b148c4378ad46a40da72e839f
+ce50fe2e6a654a3bafab950c0f101e59
+d270505df059467e8fa17974f075f3cf
+d476d6bfc0364001a6cc73877a59ca65
+d9a5b67b5c9142e984f76b1afec1939b
+da9cb8ac53274b9bbd9467b7d83c85fb
+dc48f3ab2b2844eba788898509a52806
+e1817fcc5d614723bcb1f49491fe3ed0
+f1fbc33234374c3a911148a453399186
+faab16de19484746a4716cb00b738f8e
+fdb767e69a0748c6bcdfe8764772c0d4
+ff8ec56b0c664b438d36e84882b304f4
+03ea3bf9d47e4e5789d027279e6edbbb
+064a05ca3df84e3fbf900f9a1df75577
+0ada42e959504b47ba58ca331a8d8549
+112ae8160af54eeea6b2483b903634f4
+156d6ab3d495476c997887c092aff781
+1c92543b1e9245e0a2c1e3770a0e3d11
+1e041df547e64db9aaa8d79218d880a8
+1e34fd79cbb24db4952db6e9642881d3
+1ec08e1e74d04354ac7085c004b01c2c
+20dd7f7bdc9a4c36aef491f12afa14d8
+242e99d9fe2f4eec91841fd3e8b01021
+27dbf22159a5464687f4ed9b347257d3
+28647ae054d74d2e9cac4a3dda31bb55
+29ff70f5772747f89b0db4aae9c0ade6
+2b03620bba824c1ea67945abd5c043f2
+314d74658df6431ea50bede8512882cc
+38f052a2027346e2943b4c76d2572415
+3dbaadb244e44f59b5a6b7490aac6883
+400dbd97e4e6429cab24fab8b5a3d845
+41790f8edba642ffa281a0660f318db4
+4c60ff4ebef241deae699ec8d2de86b5
+5de63c02a4374605acb69691450e6653
+65df530434624400b030da4579baa4b6
+66c66c960e1c4b3aab5f2792f5e71add
+6abf66991f584f1ba45d7297f3a128d4
+6dd6b05e20604f478d9fd868528b275f
+6f76008a68074d2bb59a0189f558ae34
+8bb433dfbef3479cbaa3bcdf63b5b6a2
+9338c7dbf4054c608c17353358cdb7c6
+9544bb7b09874f13a5ecd0429379cbd8
+95d2df27650f4beb8d208a21db7366d9
+96d50c0f7f6a40ad9e5ae39537d1062e
+9e7e71c08e5b4ff9b510afbfb2067152
+a6cce2749dfb4b4d89c0dc3460ea9d3b
+ab7e81a8a26d43ecb3131729a999ddcd
+adae06ba4b7a4cbeab892957bc40331b
+ba46772fa0234625832da0582c2f615c
+c4f57ce4bc2b4c46a32414515ba991e9
+cf09886dc98f4666bed77d6b51a4ef67
+cfde2bfa5c634a788c2c4c4480f53ba7
+d0008363ca6c4ea9976494eff45e90bb
+d403eef8a45d485e905b968cc0a1670a
+dc8d45c7ae7f453e9f861c79a40d9265
+eb8e71b3a22f4e719d8157831c408a6e
+ed896088728f4779b2fd9aa7f527e880
+f06a196aea294b0fa05dee4be971a12c
+f3e1bd29da234c8e89e0f208487fe31c
+f84ffc38cbb9400ca31be98fe89abb01
+fa31faff8ec04fa49e72e6266dc14cc4
+fb6bd558e5ff4d3b8709a39d6280460b
+808f9ffa-c14a-5d78-b8bf-197bc1f0b29c
+e1740d44-9be4-58cf-a3e6-f8208b9cdfc6
+4acf0253-00b8-5cca-be94-1f2af5bd72ba
+0c94fe68-2983-52db-822e-6ea63bd54f65
+ff9b4de9-a702-5221-bc26-f0c7ec8c4c51
+b927ce627b6841a688067331853302d6
+ccfad91e-e66d-5cc3-aff8-99f5b3a824fd
+25434b7c-4ab4-58cd-900f-aa1bfcf53233
+23d9764b-5035-5025-aae1-2788c1942a7c
+ecbc08ea-5f9d-5d2f-a496-77ec128bd3fe

data_utils/pyrender_wrapper.py ADDED Viewed

	@@ -0,0 +1,135 @@

+# Modified from https://github.com/lab4d-org/lab4d
+import os
+import numpy as np
+import cv2
+import pyrender
+import trimesh
+from pyrender import (
+    IntrinsicsCamera,
+    Mesh,
+    Node,
+    Scene,
+    OffscreenRenderer,
+    MetallicRoughnessMaterial,
+    RenderFlags
+)
+os.environ["PYOPENGL_PLATFORM"] = "egl"
+def look_at(eye, center, up):
+    """Create a look-at (view) matrix."""
+    f = np.array(center, dtype=np.float32) - np.array(eye, dtype=np.float32)
+    f /= np.linalg.norm(f)
+    u = np.array(up, dtype=np.float32)
+    u /= np.linalg.norm(u)
+    s = np.cross(f, u)
+    u = np.cross(s, f)
+    m = np.identity(4, dtype=np.float32)
+    m[0, :3] = s
+    m[1, :3] = u
+    m[2, :3] = -f
+    m[:3, 3] = -np.matmul(m[:3, :3], np.array(eye, dtype=np.float32))
+    return m
+class PyRenderWrapper:
+    def __init__(self, image_size=(1024, 1024)) -> None:
+        # renderer
+        self.image_size = image_size
+        render_size = max(image_size)
+        self.r = OffscreenRenderer(render_size, render_size)
+        self.intrinsics = IntrinsicsCamera(
+            render_size, render_size, render_size / 2, render_size / 2
+        )
+        # light
+        self.light_pose = np.eye(4)
+        self.set_light_topdown()
+        self.direc_l = pyrender.DirectionalLight(color=np.ones(3), intensity=5.0)
+        self.material = MetallicRoughnessMaterial(
+            roughnessFactor=0.75, metallicFactor=0.75, alphaMode="BLEND"
+        )
+        self.init_camera()
+    def init_camera(self):
+        self.flip_pose = np.eye(4)
+        self.set_camera(np.eye(4))
+    def set_camera(self, scene_to_cam):
+        # object to camera transforms
+        self.scene_to_cam = self.flip_pose @ scene_to_cam
+    def set_light_topdown(self, gl=False):
+        # top down light, slightly closer to the camera
+        if gl:
+            rot = cv2.Rodrigues(np.asarray([-np.pi / 2, 0, 0]))[0]
+        else:
+            rot = cv2.Rodrigues(np.asarray([np.pi / 2, 0, 0]))[0]
+        self.light_pose[:3, :3] = rot
+    def align_light_to_camera(self):
+        self.light_pose = np.linalg.inv(self.scene_to_cam)
+    def set_intrinsics(self, intrinsics):
+        """
+        Args:
+            intrinsics: (4,) fx,fy,px,py
+        """
+        self.intrinsics = IntrinsicsCamera(
+            intrinsics[0], intrinsics[1], intrinsics[2], intrinsics[3]
+        )
+    def get_cam_to_scene(self):
+        cam_to_scene = np.eye(4)
+        cam_to_scene[:3, :3] = self.scene_to_cam[:3, :3].T
+        cam_to_scene[:3, 3] = -self.scene_to_cam[:3, :3].T @ self.scene_to_cam[:3, 3]
+        return cam_to_scene
+    def set_camera_view(self, angle, bbox_center, distance=2.0):
+        # Calculate camera position based on angle and distance from bounding box center
+        camera_position = bbox_center + distance * np.array([np.sin(angle), 0, np.cos(angle)], dtype=np.float32)
+        look_at_matrix = look_at(camera_position, bbox_center, [0, 1, 0])
+        self.scene_to_cam = look_at_matrix @ self.flip_pose
+    def render(self, input_dict):
+        # Create separate scenes for transparent objects (mesh) and solid objects (joints and bones)
+        scene_transparent = Scene(ambient_light=np.array([1.0, 1.0, 1.0, 1.0]) * 0.1)
+        scene_solid = Scene(ambient_light=np.array([1.0, 1.0, 1.0, 1.0]) * 0.1)
+        mesh_pyrender = Mesh.from_trimesh(input_dict["shape"], smooth=False)
+        mesh_pyrender.primitives[0].material = self.material
+        scene_transparent.add(mesh_pyrender, pose=np.eye(4), name="shape")
+        if "joint_meshes" in input_dict:
+            joints_pyrender = Mesh.from_trimesh(input_dict["joint_meshes"], smooth=False)
+            joints_pyrender.primitives[0].material = self.material
+            scene_solid.add(joints_pyrender, pose=np.eye(4), name="joints")
+        if "bone_meshes" in input_dict:
+            bones_pyrender = Mesh.from_trimesh(input_dict["bone_meshes"], smooth=False)
+            bones_pyrender.primitives[0].material = self.material
+            scene_solid.add(bones_pyrender, pose=np.eye(4), name="bones")
+        # Camera for both scenes
+        scene_transparent.add(self.intrinsics, pose=self.get_cam_to_scene())
+        scene_solid.add(self.intrinsics, pose=self.get_cam_to_scene())
+        # Light for both scenes
+        scene_transparent.add(self.direc_l, pose=self.light_pose)
+        scene_solid.add(self.direc_l, pose=self.light_pose)
+        # Render transparent scene first
+        color_transparent, depth_transparent = self.r.render(scene_transparent)
+        # Render solid scene on top
+        color_solid, depth_solid = self.r.render(scene_solid)
+        # Combine the two scenes
+        color_combined = np.where(depth_solid[..., np.newaxis] == 0, color_transparent, color_solid)
+        return color_combined, depth_solid
+    def delete(self):
+        self.r.delete()

data_utils/read_npz.py ADDED Viewed

	@@ -0,0 +1,43 @@

+#  Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+import numpy as np
+import scipy.sparse as sp
+# Load the NPZ file
+data = np.load('articulation_xlv2_test.npz', allow_pickle=True)
+data_list = data['arr_0']
+print(f"Loaded {len(data_list)} data entries")
+print(f"Data keys: {data_list[0].keys()}")
+# 'vertices', 'faces', 'normals', 'joints', 'bones', 'root_index', 'uuid', 'pc_w_norm', 'joint_names', 'skinning_weights_value',
+# 'skinning_weights_row', 'skinning_weights_col', 'skinning_weights_shape'
+data = data_list[0] # check the first data
+vertices = data['vertices'] # (n_vertex, 3)
+faces = data['faces'] # (n_faces, 3)
+normals = data['normals'] # (n_vertex, 3)
+joints = data['joints'] # (n_joints, 3)
+bones = data['bones'] # (n_bones, 2)
+pc_w_norm = data['pc_w_norm'] # (8192, 6)
+# Extract the sparse skinning weights components
+skinning_data = data['skinning_weights_value']
+skinning_rows = data['skinning_weights_row']
+skinning_cols = data['skinning_weights_col']
+skinning_shape = data['skinning_weights_shape']
+skinning_sparse = sp.coo_matrix((skinning_data, (skinning_rows, skinning_cols)), shape=skinning_shape)
+skinning_weights = skinning_sparse.toarray()  # (n_vertex, n_joints)

data_utils/read_rig_mesh_from_glb.py ADDED Viewed

	@@ -0,0 +1,198 @@

+#  Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+"""
+Blender script for extracting rig (.txt) and mesh (.obj) from glbs.
+This code currently supports GLB files only, but it can be easily modified to load other formats (e.g., FBX, DAE) with minimal changes.
+"""
+import bpy
+import os
+import re
+import json
+import pickle
+def get_hierarchy_root_joint(joint):
+    """
+    Function to find the top parent joint node from the given
+    'joint' Blender node (armature bone).
+    """
+    root_joint = joint
+    while root_joint.parent is not None:
+        root_joint = root_joint.parent
+    return root_joint
+def get_meshes_and_armatures():
+    """
+    Function to get all meshes and armatures in the scene
+    """
+    default_objects = ['Cube', 'Light', 'Camera', 'Icosphere']
+    for obj_name in default_objects:
+        if obj_name in bpy.data.objects:
+            bpy.data.objects.remove(bpy.data.objects[obj_name], do_unlink=True)
+    meshes = [obj for obj in bpy.context.scene.objects if obj.type == 'MESH']
+    armatures = [obj for obj in bpy.context.scene.objects if obj.type == 'ARMATURE']
+    return meshes, armatures
+def get_joint_dict(root):
+    """
+    Function to create a dictionary of joints from the root joint
+    """
+    joint_pos = {}
+    def traverse_bone(bone):
+        joint_pos[bone.name] = {
+            'pos': bone.head_local,
+            'pa': bone.parent.name if bone.parent else 'None',
+            'ch': [child.name for child in bone.children]
+        }
+        for child in bone.children:
+            traverse_bone(child)
+    traverse_bone(root)
+    return joint_pos
+def record_info(root, joint_dict, meshes, mesh_vert_offsets, file_info):
+    """
+    - root: root joint
+    - joint_dict
+    - meshes
+    - mesh_vert_offsets: for multi-geometry
+    - file_info
+    """
+    skin_records = {}
+    def replace_special_characters(name):
+        return re.sub(r'\W+', '_', name)
+    for key, val in joint_dict.items():
+        modified_key = replace_special_characters(key)
+        file_info.write(f'joints {modified_key} {val["pos"][0]:.8f} {val["pos"][1]:.8f} {val["pos"][2]:.8f}\n')
+    file_info.write(f'root {replace_special_characters(root.name)}\n')
+    for mesh_index, mesh in enumerate(meshes):
+        vert_offset = mesh_vert_offsets[mesh_index]
+        if mesh.type == 'MESH':
+            for vtx in mesh.data.vertices:
+                weights = {}
+                for group in vtx.groups:
+                    bone_name = replace_special_characters(mesh.vertex_groups[group.group].name)
+                    weights[bone_name] = group.weight
+                global_vertex_index = vert_offset + vtx.index
+                skin_record = f"skin {global_vertex_index} " + " ".join(f"{bone} {weight:.4f}" for bone, weight in weights.items())
+                if global_vertex_index not in skin_records:
+                    skin_records[global_vertex_index] = skin_record
+                    file_info.write(skin_record + "\n")
+    for key, val in joint_dict.items():
+        if val['pa'] != 'None':
+            parent_name = replace_special_characters(val['pa'])
+            child_name = replace_special_characters(key)
+            file_info.write(f'hier {parent_name} {child_name}\n')
+def record_obj(meshes, file_obj):
+    vert_offset = 0
+    norm_offset = 0
+    mesh_vert_offsets = []
+    for mesh in meshes:
+        mesh_vert_offsets.append(vert_offset)
+        bpy.context.view_layer.objects.active = mesh
+        bpy.ops.object.mode_set(mode='OBJECT')
+        # vertex
+        for v in mesh.data.vertices:
+            file_obj.write(f"v {v.co[0]} {v.co[1]} {v.co[2]}\n")
+        file_obj.write("\n")
+        # normal
+        for vn in mesh.data.vertices:
+            normal = vn.normal
+            file_obj.write(f"vn {normal[0]} {normal[1]} {normal[2]}\n")
+        file_obj.write("\n")
+        # face
+        for poly in mesh.data.polygons:
+            verts = [v + 1 + vert_offset for v in poly.vertices]
+            file_obj.write(f"f {verts[0]}//{verts[0]} {verts[1]}//{verts[1]} {verts[2]}//{verts[2]}\n")
+        vert_count = len(mesh.data.vertices)
+        vert_offset += vert_count
+        norm_offset += vert_count
+    return mesh_vert_offsets
+def process_glb(glb_path, rigs_dir, meshes_dir):
+    base_name = os.path.splitext(os.path.basename(glb_path))[0]
+    obj_name = os.path.join(meshes_dir, f'{base_name}.obj')
+    info_name = os.path.join(rigs_dir, f'{base_name}.txt')
+    # Skip processing if rig info file already exists
+    if os.path.exists(info_name):
+        print(f"{info_name} already exists. Skipping...")
+        return
+    if os.path.exists(obj_name):
+        print(f"{obj_name} already exists. Skipping...")
+        return
+    bpy.ops.wm.read_factory_settings(use_empty=True)
+    bpy.ops.import_scene.gltf(filepath=glb_path)
+    meshes, armatures = get_meshes_and_armatures()
+    if not armatures:
+        print(f"No armatures found in {glb_path}. Skipping...")
+        return
+    root = armatures[0].data.bones[0]
+    root_name = get_hierarchy_root_joint(root)
+    joint_dict = get_joint_dict(root_name)
+    #  save meshes
+    with open(obj_name, 'w') as file_obj:
+        mesh_vert_offsets = record_obj(meshes, file_obj)
+    # save rigs
+    with open(info_name, 'w') as file_info:
+        record_info(root_name, joint_dict, meshes, mesh_vert_offsets, file_info)
+    print(f"Processed {glb_path}")
+if __name__ == '__main__':
+    src_dir = 'glbs'
+    rigs_dir = 'rigs'
+    meshes_dir = 'meshes'
+    # Ensure rigs directory exists
+    if not os.path.exists(rigs_dir):
+        os.makedirs(rigs_dir)
+    if not os.path.exists(meshes_dir):
+        os.makedirs(meshes_dir)
+    glb_paths = [os.path.join(src_dir, file) for file in os.listdir(src_dir) if file.endswith('.glb')]
+    print(len(glb_paths))
+    for glb_path in glb_paths:
+        try:
+            process_glb(glb_path, rigs_dir, meshes_dir)
+        except Exception as e:
+            with open('error.txt', 'a') as error_file:
+                error_file.write(f"{glb_path}: {str(e)}\n")

data_utils/render_data.py ADDED Viewed

	@@ -0,0 +1,61 @@

+#  Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+import os
+import numpy as np
+import cv2
+from pyrender_wrapper import PyRenderWrapper
+from data_loader import DataLoader
+def main():
+    loader = DataLoader()
+    raw_size = (960, 960)
+    renderer = PyRenderWrapper(raw_size)
+    output_dir = 'render_results'
+    os.makedirs(output_dir, exist_ok=True)
+    rig_path = 'examples/0a59c5ffa4a1476bac6d540b79947f31.txt'
+    mesh_path = rig_path.replace('.txt', '.obj')
+    filename = os.path.splitext(os.path.basename(rig_path))[0]
+    loader.load_rig_data(rig_path)
+    loader.load_mesh(mesh_path)
+    input_dict = loader.query_mesh_rig()
+    angles = [0, np.pi/2, np.pi, 3*np.pi/2]
+    bbox_center = loader.mesh.bounding_box.centroid
+    bbox_size = loader.mesh.bounding_box.extents
+    distance = np.max(bbox_size) * 2
+    subfolder_path = os.path.join(output_dir, filename)
+    os.makedirs(subfolder_path, exist_ok=True)
+    for i, angle in enumerate(angles):
+        print(f"Rendering view at {np.degrees(angle)} degrees")
+        renderer.set_camera_view(angle, bbox_center, distance)
+        renderer.align_light_to_camera()
+        color = renderer.render(input_dict)[0]
+        output_filename = f"{filename}_view{i+1}.png"
+        output_filepath = os.path.join(subfolder_path, output_filename)
+        cv2.imwrite(output_filepath, color)
+if __name__ == "__main__":
+    main()

data_utils/save_npz.py ADDED Viewed

	@@ -0,0 +1,256 @@

+#  Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+"""
+This python script shows how we process the meshes and rigs from the input folders and save them in a compressed npz file.
+"""
+import os
+import numpy as np
+import glob
+import pickle
+from concurrent.futures import ProcessPoolExecutor
+import skimage.measure
+import trimesh
+import mesh2sdf.core
+import scipy.sparse as sp
+def read_obj_file(file_path):
+    vertices = []
+    faces = []
+    normals = []  # Added normals list
+    with open(file_path, 'r') as file:
+        for line in file:
+            if line.startswith('v '):
+                parts = line.split()[1:]
+                vertices.append([float(parts[0]), float(parts[1]), float(parts[2])])
+            elif line.startswith('vn '):  # Added reading normals
+                parts = line.split()[1:]
+                normals.append([float(parts[0]), float(parts[1]), float(parts[2])])
+            elif line.startswith('f '):
+                parts = line.split()[1:]
+                # OBJ format is 1-based, we need 0-based for npz
+                face = [int(part.split('//')[0]) - 1 for part in parts]
+                faces.append(face)
+    return np.array(vertices), np.array(faces), np.array(normals)
+def read_rig_file(file_path):
+    """
+    Read rig from txt file, our format is the same as RigNet:
+    joints joint_name x y z
+    root root_joint_name
+    skin vertex_idx joint_name weight joint_name weight ...
+    hier parent_joint_name child_joint_name
+    """
+    joints = []
+    bones = []
+    joint_names = []
+    joint_mapping = {}
+    joint_index = 0
+    skinning_data = {}  # Dictionary to store vertex index -> [(joint_idx, weight), ...]
+    with open(file_path, 'r') as file:
+        lines = file.readlines()
+    for line in lines:
+        parts = line.split()
+        if line.startswith('joints'):
+            name = parts[1]
+            position = [float(parts[2]), float(parts[3]), float(parts[4])]
+            joints.append(position)
+            joint_names.append(name)
+            joint_mapping[name] = joint_index
+            joint_index += 1
+        elif line.startswith('hier'):
+            parent_joint = joint_mapping[parts[1]]
+            child_joint = joint_mapping[parts[2]]
+            bones.append([parent_joint, child_joint])
+        elif line.startswith('root'):
+            root = joint_mapping[parts[1]]
+        elif line.startswith('skin'):
+            vertex_idx = int(parts[1])
+            if vertex_idx not in skinning_data:
+                skinning_data[vertex_idx] = []
+            for i in range(2, len(parts), 2):
+                if i+1 < len(parts):
+                    joint_name = parts[i]
+                    weight = float(parts[i+1])
+                    if joint_name in joint_mapping:
+                        joint_idx = joint_mapping[joint_name]
+                        skinning_data[vertex_idx].append((joint_idx, weight))
+    return np.array(joints), np.array(bones), root, joint_names, skinning_data
+def convert_to_sparse_skinning(skinning_data, num_vertices, num_joints):
+    """Convert skinning weights to sparse matrix format."""
+    rows = []
+    cols = []
+    data = []
+    for vertex_idx, weights in skinning_data.items():
+        for joint_idx, weight in weights:
+            rows.append(vertex_idx)
+            cols.append(joint_idx)
+            data.append(weight)
+    sparse_skinning = sp.coo_matrix((data, (rows, cols)), shape=(num_vertices, num_joints))
+    # Return as tuple of arrays which can be serialized
+    return (sparse_skinning.data, sparse_skinning.row, sparse_skinning.col, sparse_skinning.shape)
+def normalize_to_unit_cube(vertices, normals=None, scale_factor=1.0):
+    min_coords = vertices.min(axis=0)
+    max_coords = vertices.max(axis=0)
+    center = (max_coords + min_coords) / 2.0
+    vertices -= center
+    scale = 1.0 / np.abs(vertices).max() * scale_factor
+    vertices *= scale
+    if normals is not None:
+        # Normalize each normal vector to unit length
+        norms = np.linalg.norm(normals, axis=1, keepdims=True)
+        normals = normals / (norms+1e-8)
+        return vertices, normals, center, scale
+    else:
+        return vertices, center, scale
+def normalize_vertices(vertices, scale=0.9):
+    bbmin, bbmax = vertices.min(0), vertices.max(0)
+    center = (bbmin + bbmax) * 0.5
+    scale = 2.0 * scale / (bbmax - bbmin).max()
+    vertices = (vertices - center) * scale
+    return vertices, center, scale
+def export_to_watertight(normalized_mesh, octree_depth: int = 7):
+    """
+        Convert the non-watertight mesh to watertight.
+        Args:
+            input_path (str): normalized path
+            octree_depth (int):
+        Returns:
+            mesh(trimesh.Trimesh): watertight mesh
+        """
+    size = 2 ** octree_depth
+    level = 2 / size
+    scaled_vertices, to_orig_center, to_orig_scale = normalize_vertices(normalized_mesh.vertices)
+    sdf = mesh2sdf.core.compute(scaled_vertices, normalized_mesh.faces, size=size)
+    vertices, faces, normals, _ = skimage.measure.marching_cubes(np.abs(sdf), level)
+    # watertight mesh
+    vertices = vertices / size * 2 - 1 # -1 to 1
+    vertices = vertices / to_orig_scale + to_orig_center
+    mesh = trimesh.Trimesh(vertices, faces, normals=normals)
+    return mesh
+def process_mesh_to_pc(mesh, marching_cubes = True, sample_num = 8192):
+    if marching_cubes:
+        mesh = export_to_watertight(mesh)
+    return_mesh = mesh
+    points, face_idx = mesh.sample(sample_num, return_index=True)
+    points, _, _ = normalize_to_unit_cube(points, scale_factor=0.9995)
+    normals = mesh.face_normals[face_idx]
+    pc_normal = np.concatenate([points, normals], axis=-1, dtype=np.float16)
+    return pc_normal, return_mesh
+def process_single_file(args):
+    mesh_file, rig_file = args
+    mesh_name = os.path.basename(mesh_file).split('.')[0]
+    rig_name = os.path.basename(rig_file).split('.')[0]
+    if mesh_name != rig_name:
+        print(f"Skipping files {mesh_file} and {rig_file} because their names do not match.")
+        return None
+    vertices, faces, normals = read_obj_file(mesh_file)
+    joints, bones, root, joint_names, skinning_data = read_rig_file(rig_file)
+    # Normalize the mesh to the unit cube centered at the origin
+    vertices, normals, center, scale = normalize_to_unit_cube(vertices, normals, scale_factor=0.5)
+    # Apply the same transformation to joints
+    joints -= center
+    joints *= scale
+    # Create trimesh object for processing
+    mesh = trimesh.Trimesh(vertices=vertices, faces=faces)
+    # Process into point cloud with normals
+    pc_normal, _ = process_mesh_to_pc(mesh)
+    # Convert skinning data to sparse format
+    sparse_skinning = convert_to_sparse_skinning(skinning_data, len(vertices), len(joints))
+    return {
+        'vertices': vertices,
+        'faces': faces,
+        'normals': normals,
+        'joints': joints,
+        'bones': bones,
+        'root_index': root,
+        'uuid': mesh_name,
+        'pc_w_norm': pc_normal,
+        'joint_names': joint_names,
+        'skinning_weights_value': sparse_skinning[0],  # values
+        'skinning_weights_rows': sparse_skinning[1],  # row indices
+        'skinning_weights_cols': sparse_skinning[2],  # column indices
+        'skinning_weights_shape': sparse_skinning[3]  # shape of matrix
+    }
+def process_files(mesh_folder, rig_folder, output_file, num_workers=8):
+    file_pairs = []
+    for root, _, files in os.walk(rig_folder):
+        for file in files:
+            if file.endswith('.txt'):
+                rig_file = os.path.join(root, file)
+                obj_base_name = os.path.splitext(file)[0]
+                mesh_file = os.path.join(mesh_folder, obj_base_name + '.obj')
+                if os.path.exists(mesh_file):
+                    file_pairs.append((mesh_file, rig_file))
+                else:
+                    print(f"Mesh file not found: {mesh_file}")
+    with ProcessPoolExecutor(max_workers=num_workers) as executor:
+        data_list = list(executor.map(process_single_file, file_pairs))
+    data_list = [data for data in data_list if data is not None]
+    np.savez_compressed(output_file, data_list, allow_pickle=True)
+def main():
+    # Example usage
+    mesh_folder = 'meshes/'
+    rig_folder = 'rigs/'
+    output_file = 'results.npz'
+    process_files(mesh_folder, rig_folder, output_file)
+if __name__ == "__main__":
+    main()

data_utils/update_npz_rm_issue_data.py ADDED Viewed

	@@ -0,0 +1,59 @@

+#  Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+import numpy as np
+import os
+def filter_npz_by_filenames(npz_path, txt_path, output_path):
+    data_list = np.load(npz_path, allow_pickle=True)['arr_0']
+    with open(txt_path, 'r') as f:
+        exclude_filenames = set(line.strip() for line in f if line.strip())
+    # Filter the data list
+    filtered_data = []
+    excluded_count = 0
+    for item in data_list:
+        filename = item['uuid']
+        if filename in exclude_filenames:
+            excluded_count += 1
+            print(filename)
+        else:
+            filtered_data.append(item)
+    # Save the filtered data
+    kept_count = len(filtered_data)
+    total_count = len(data_list)
+    print(f"Original items: {total_count}")
+    print(f"Kept items: {kept_count}")
+    print(f"Removed items: {excluded_count}")
+    print(f"Saving filtered data")
+    np.savez_compressed(output_path, filtered_data, allow_pickle=True)
+def main():
+    issue_list = "data_utils/issue_data_list.txt"  # Change this to your text file path
+    npz_path_train = "articulation_xlv2_train.npz"  # Change this to your NPZ file path
+    output_path_train = "articulation_xlv2_train_update.npz"
+    npz_path_test = "articulation_xlv2_test.npz"  # Change this to your NPZ file path
+    output_path_test = "articulation_xlv2_test_update.npz"
+    filter_npz_by_filenames(npz_path_train, issue_list, output_path_train)
+    filter_npz_by_filenames(npz_path_test, issue_list, output_path_test)
+if __name__ == "__main__":
+    main()

demo.py ADDED Viewed

	@@ -0,0 +1,214 @@

+#  Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+import os
+import torch
+import trimesh
+import argparse
+import numpy as np
+from tqdm import tqdm
+from trimesh import Scene
+from accelerate import Accelerator
+from accelerate.utils import set_seed
+from accelerate.utils import DistributedDataParallelKwargs
+from skeleton_models.skeletongen import SkeletonGPT
+from data_utils.save_npz import normalize_to_unit_cube
+from utils.mesh_to_pc import MeshProcessor
+from utils.save_utils import save_mesh, pred_joints_and_bones, save_skeleton_to_txt, save_args, \
+                        merge_duplicate_joints_and_fix_bones, save_skeleton_obj, render_mesh_with_skeleton
+class Dataset:
+    def __init__(self, input_list, input_pc_num = 8192, apply_marching_cubes = True, octree_depth = 7, output_dir = None):
+        super().__init__()
+        self.data = []
+        self.output_dir = output_dir
+        mesh_list = []
+        for input_path in input_list:
+            ext = os.path.splitext(input_path)[1].lower()
+            if ext in ['.ply', '.stl', '.obj']:
+                cur_data = trimesh.load(input_path, force='mesh')
+                mesh_list.append(cur_data)
+            else:
+                print(f"Unsupported file type: {ext}")
+        if apply_marching_cubes:
+            print("First apply Marching Cubes and then sample point cloud, need time...")
+        pc_list = MeshProcessor.convert_meshes_to_point_clouds(mesh_list, input_pc_num, apply_marching_cubes = apply_marching_cubes, octree_depth = octree_depth)
+        for input_path, cur_data, mesh in zip(input_list, pc_list, mesh_list):
+            self.data.append({'pc_normal': cur_data, 'faces': mesh.faces, 'vertices': mesh.vertices, 'file_name': os.path.splitext(os.path.basename(input_path))[0]})
+        print(f"dataset total data samples: {len(self.data)}")
+    def __len__(self):
+        return len(self.data)
+    def __getitem__(self, idx):
+        data_dict = {}
+        data_dict['pc_normal'] = self.data[idx]['pc_normal']
+        # normalize pc coor
+        pc_coor = data_dict['pc_normal'][:, :3]
+        normals = data_dict['pc_normal'][:, 3:]
+        pc_coor, center, scale = normalize_to_unit_cube(pc_coor, scale_factor=0.9995)
+        data_dict['file_name'] = self.data[idx]['file_name']
+        pc_coor = pc_coor.astype(np.float32)
+        normals = normals.astype(np.float32)
+        point_cloud = trimesh.PointCloud(pc_coor)
+        point_cloud.metadata['normals'] = normals
+        try:
+            point_cloud.export(os.path.join(self.output_dir, f"{data_dict['file_name']}.ply"))
+        except Exception as e:
+            print(f"fail to save point clouds: {e}")
+        assert (np.linalg.norm(normals, axis=-1) > 0.99).all(), "normals should be unit vectors, something wrong"
+        data_dict['pc_normal'] = np.concatenate([pc_coor, normals], axis=-1, dtype=np.float16)
+        vertices = self.data[idx]['vertices']
+        faces = self.data[idx]['faces']
+        bounds = np.array([pc_coor.min(axis=0), pc_coor.max(axis=0)])
+        pc_center = (bounds[0] + bounds[1])[None, :] / 2
+        pc_scale = ((bounds[1] - bounds[0]).max() + 1e-5)
+        data_dict['transform_params'] = torch.tensor([
+            center[0], center[1], center[2],
+            scale,
+            pc_center[0][0], pc_center[0][1], pc_center[0][2],
+            pc_scale
+        ], dtype=torch.float32)
+        data_dict['vertices'] = vertices
+        data_dict['faces']= faces
+        return data_dict
+def get_args():
+    parser = argparse.ArgumentParser("SkeletonGPT", add_help=False)
+    parser.add_argument("--input_pc_num", default=8192, type=int)
+    parser.add_argument("--num_beams", default=1, type=int)
+    parser.add_argument('--input_dir', default=None, type=str, help="input mesh directory")
+    parser.add_argument('--input_path', default=None, type=str, help="input mesh path")
+    parser.add_argument("--output_dir", default="outputs", type=str)
+    parser.add_argument('--llm', default="facebook/opt-350m", type=str, help="The LLM backend")
+    parser.add_argument("--pad_id", default=-1, type=int, help="padding id")
+    parser.add_argument("--n_discrete_size", default=128, type=int, help="discretized 3D space")
+    parser.add_argument("--n_max_bones", default=100, type=int, help="max number of bones")
+    parser.add_argument('--dataset_path', default="combine_256_updated", type=str, help="data path")
+    parser.add_argument("--seed", default=0, type=int)
+    parser.add_argument("--precision", default="fp16", type=str)
+    parser.add_argument("--batchsize_per_gpu", default=1, type=int)
+    parser.add_argument('--pretrained_weights', default=None, type=str)
+    parser.add_argument('--save_name', default="infer_results", type=str)
+    parser.add_argument("--save_render", default=False, action="store_true", help="save rendering results of mesh with skel")
+    parser.add_argument("--apply_marching_cubes", default=False, action="store_true")
+    parser.add_argument("--octree_depth", default=7, type=int)
+    parser.add_argument("--hier_order", default=False, action="store_true")
+    args = parser.parse_args()
+    return args
+if __name__ == "__main__":
+    args = get_args()
+    output_dir = f'{args.output_dir}/{args.save_name}'
+    os.makedirs(output_dir, exist_ok=True)
+    save_args(args, output_dir)
+    kwargs = DistributedDataParallelKwargs(find_unused_parameters=True)
+    accelerator = Accelerator(
+        kwargs_handlers=[kwargs],
+        mixed_precision=args.precision,
+    )
+    model = SkeletonGPT(args).cuda()
+    if args.pretrained_weights is not None:
+        pkg = torch.load(args.pretrained_weights, map_location=torch.device("cpu"))
+        model.load_state_dict(pkg["model"])
+    else:
+        raise ValueError("Pretrained weights must be provided.")
+    model.eval()
+    set_seed(args.seed)
+    # create dataset
+    if args.input_dir is not None:
+        input_list = sorted(os.listdir(args.input_dir))
+        input_list = [os.path.join(args.input_dir, x) for x in input_list if x.endswith('.ply') or x.endswith('.obj') or x.endswith('.stl')]
+        dataset = Dataset(input_list, args.input_pc_num, args.apply_marching_cubes, args.octree_depth, output_dir)
+    elif args.input_path is not None:
+        dataset = Dataset([args.input_path], args.input_pc_num, args.apply_marching_cubes, args.octree_depth, output_dir)
+    else:
+        raise ValueError("input_dir or input_path must be provided.")
+    dataloader = torch.utils.data.DataLoader(
+        dataset,
+        batch_size= 1,
+        drop_last = False,
+        shuffle = False,
+    )
+    dataloader, model = accelerator.prepare(dataloader, model)
+    for curr_iter, batch_data_label in tqdm(enumerate(dataloader), total=len(dataloader)):
+        with accelerator.autocast():
+            pred_bone_coords = model.generate(batch_data_label)
+        # determine the output file name
+        file_name = os.path.basename(batch_data_label['file_name'][0])
+        pred_skel_filename = os.path.join(output_dir, f'{file_name}_skel.obj')
+        pred_rig_filename = os.path.join(output_dir, f"{file_name}_pred.txt")
+        mesh_filename = os.path.join(output_dir, f"{file_name}_mesh.obj")
+        transform_params = batch_data_label['transform_params'][0].cpu().numpy()
+        trans = transform_params[:3]
+        scale = transform_params[3]
+        pc_trans = transform_params[4:7]
+        pc_scale = transform_params[7]
+        vertices = batch_data_label['vertices'][0].cpu().numpy()
+        faces = batch_data_label['faces'][0].cpu().numpy()
+        skeleton = pred_bone_coords[0].cpu().numpy()
+        pred_joints, pred_bones = pred_joints_and_bones(skeleton.squeeze())
+        # Post process: merge duplicate or nearby joints and deduplicate bones.
+        if args.hier_order:
+            pred_root_index = pred_bones[0][0]
+            pred_joints, pred_bones, pred_root_index = merge_duplicate_joints_and_fix_bones(pred_joints, pred_bones, root_index=pred_root_index)
+        else:
+            pred_joints, pred_bones = merge_duplicate_joints_and_fix_bones(pred_joints, pred_bones)
+            pred_root_index = None
+        # when save rig to txt, denormalize the skeletons to the same scale with input meshes
+        pred_joints_denorm = pred_joints * pc_scale + pc_trans # first align with point cloud
+        pred_joints_denorm = pred_joints_denorm / scale + trans # then align with original mesh
+        save_skeleton_to_txt(pred_joints_denorm, pred_bones, pred_root_index, args.hier_order, vertices, pred_rig_filename)
+        # save skeletons
+        if args.hier_order:
+            save_skeleton_obj(pred_joints, pred_bones, pred_skel_filename, pred_root_index, use_cone=True)
+        else:
+            save_skeleton_obj(pred_joints, pred_bones, pred_skel_filename, use_cone=False)
+        # when saving mesh and rendering, use normalized vertices (-0.5,0.5)
+        vertices_norm = (vertices - trans) * scale
+        vertices_norm = (vertices_norm - pc_trans) / pc_scale
+        save_mesh(vertices_norm, faces, mesh_filename)
+        # render mesh w/ skeleton
+        if args.save_render:
+            if args.hier_order:
+                render_mesh_with_skeleton(pred_joints, pred_bones, vertices_norm, faces, output_dir, file_name, prefix='pred', root_idx=pred_root_index)
+            else:
+                render_mesh_with_skeleton(pred_joints, pred_bones, vertices_norm, faces, output_dir, file_name, prefix='pred')

demo.sh ADDED Viewed

	@@ -0,0 +1,4 @@

+CUDA_VISIBLE_DEVICES=0 python demo.py --input_dir ./examples \
+            --pretrained_weights skeleton_ckpt/checkpoint_trainonv2_hier.pth \
+            --save_name infer_results_demo_hier --input_pc_num 8192 \
+            --save_render --apply_marching_cubes --hier_order

download.py ADDED Viewed

	@@ -0,0 +1,19 @@

+from huggingface_hub import hf_hub_download
+file_path = hf_hub_download(
+    repo_id="Maikou/Michelangelo",
+    filename="checkpoints/aligned_shape_latents/shapevae-256.ckpt",
+    local_dir="third_partys/Michelangelo"
+)
+file_path = hf_hub_download(
+    repo_id="Seed3D/MagicArticulate",
+    filename="skeleton_ckpt/checkpoint_trainonv2_hier.pth",
+    local_dir=""
+)
+file_path = hf_hub_download(
+    repo_id="Seed3D/MagicArticulate",
+    filename="skeleton_ckpt/checkpoint_trainonv2_spatial.pth",
+    local_dir=""
+)

requirements.txt ADDED Viewed

	@@ -0,0 +1,37 @@

+#trimesh==4.2.3
+#accelerate==0.28.0
+#mesh2sdf==1.1.0
+#transformers==4.39.3
+#numpy==1.26.4
+#pyrender==0.1.45
+#tqdm
+#opencv-python==4.9.0.80
+#omegaconf==2.3.0
+#einops==0.7.0
+##======= HF===================
+# MagicArticulate Requirements for Gradio Demo
+# Compatible with CUDA 11.8 and Python 3.10
+--extra-index-url https://download.pytorch.org/whl/cu118
+torch==2.1.1
+torchvision==0.16.1
+torchaudio==2.1.1
+# Gradio for web interface
+gradio==4.44.0
+# 3D mesh processing
+trimesh==4.4.3
+accelerate==0.28.0
+mesh2sdf==1.1.0
+transformers==4.39.3
+numpy==1.26.4
+pyrender==0.1.45
+tqdm
+opencv-python==4.9.0.80
+omegaconf==2.3.0
+einops==0.7.0
+flash-attn==2.6.3
+huggingface_hub
+gradio-client>=1.0.0

skeleton_models/shape_opt.py ADDED Viewed

	@@ -0,0 +1,406 @@

+# Modified from https://github.com/buaacyw/MeshAnything
+from transformers import AutoModelForCausalLM, AutoConfig, OPTConfig
+from transformers.models.opt.modeling_opt import OPTForCausalLM, OPTModel, OPTDecoder, OPTLearnedPositionalEmbedding, OPTDecoderLayer
+from typing import List, Optional, Tuple, Union
+from transformers.modeling_outputs import (
+    CausalLMOutputWithPast,
+)
+import torch
+from torch import nn
+from torch.nn import CrossEntropyLoss
+from transformers.utils import replace_return_docstrings
+from transformers.modeling_outputs import BaseModelOutputWithPast
+class ShapeOPTConfig(OPTConfig):
+    model_type = "shape_opt"
+class ShapeOPT(OPTForCausalLM):
+    config_class = ShapeOPTConfig
+    def __init__(self, config: ShapeOPTConfig):
+        super(OPTForCausalLM, self).__init__(config)
+        self.model = ShapeOPTModel(config)
+        self.lm_head = nn.Linear(config.word_embed_proj_dim, config.vocab_size, bias=False)
+        # Initialize weights and apply final processing
+        self.post_init()
+    @replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class="OPTConfig")
+    def forward(
+        self,
+        input_ids: torch.LongTensor = None,
+        bone_ids: torch.LongTensor = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        head_mask: Optional[torch.Tensor] = None,
+        past_key_values: Optional[List[torch.FloatTensor]] = None,
+        inputs_embeds: Optional[torch.FloatTensor] = None,
+        labels: Optional[torch.LongTensor] = None,
+        use_cache: Optional[bool] = None,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+        return_dict: Optional[bool] = None,
+    ) -> Union[Tuple, CausalLMOutputWithPast]:
+        r"""
+        Args:
+            input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
+                Indices of input sequence tokens in the vocabulary. Padding will be ignored by default should you
+                provide it.
+                Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
+                [`PreTrainedTokenizer.__call__`] for details.
+                [What are input IDs?](../glossary#input-ids)
+            attention_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
+                Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:
+                - 1 for tokens that are **not masked**,
+                - 0 for tokens that are **masked**.
+                [What are attention masks?](../glossary#attention-mask)
+            head_mask (`torch.Tensor` of shape `(num_hidden_layers, num_attention_heads)`, *optional*):
+                Mask to nullify selected heads of the attention modules. Mask values selected in `[0, 1]`:
+                - 1 indicates the head is **not masked**,
+                - 0 indicates the head is **masked**.
+            past_key_values (`tuple(tuple(torch.FloatTensor))`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):
+                Tuple of `tuple(torch.FloatTensor)` of length `config.n_layers`, with each tuple having 2 tensors of
+                shape `(batch_size, num_heads, sequence_length, embed_size_per_head)`) and 2 additional tensors of
+                shape `(batch_size, num_heads, encoder_sequence_length, embed_size_per_head)`. The two additional
+                tensors are only required when the model is used as a decoder in a Sequence to Sequence model.
+                Contains pre-computed hidden-states (key and values in the self-attention blocks and in the
+                cross-attention blocks) that can be used (see `past_key_values` input) to speed up sequential decoding.
+                If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those
+                that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of
+                all `decoder_input_ids` of shape `(batch_size, sequence_length)`.
+            inputs_embeds (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
+                Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation.
+                This is useful if you want more control over how to convert `input_ids` indices into associated vectors
+                than the model's internal embedding lookup matrix.
+            labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
+                Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
+                config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
+                (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.
+            use_cache (`bool`, *optional*):
+                If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding
+                (see `past_key_values`).
+            output_attentions (`bool`, *optional*):
+                Whether or not to return the attentions tensors of all attention layers. See `attentions` under
+                returned tensors for more detail.
+            output_hidden_states (`bool`, *optional*):
+                Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors
+                for more detail.
+            return_dict (`bool`, *optional*):
+                Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
+        Returns:
+        Example:
+        ```python
+        >>> from transformers import AutoTokenizer, OPTForCausalLM
+        >>> model = OPTForCausalLM.from_pretrained("facebook/opt-350m")
+        >>> tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
+        >>> prompt = "Hey, are you conscious? Can you talk to me?"
+        >>> inputs = tokenizer(prompt, return_tensors="pt")
+        >>> # Generate
+        >>> generate_ids = model.generate(inputs.input_ids, max_length=30)
+        >>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
+        "Hey, are you conscious? Can you talk to me?\nI'm not conscious. I'm just a little bit of a weirdo."
+        ```"""
+        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
+        output_hidden_states = (
+            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
+        )
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+        # decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)
+        outputs = self.model.decoder(
+            input_ids = input_ids,
+            bone_ids = bone_ids,
+            attention_mask=attention_mask,
+            head_mask=head_mask,
+            past_key_values=past_key_values,
+            inputs_embeds=inputs_embeds,
+            use_cache=use_cache,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=return_dict,
+        )
+        logits = self.lm_head(outputs[0]).contiguous()
+        loss = None
+        if labels is not None:
+            # move labels to correct device to enable model parallelism
+            labels = labels.to(logits.device)
+            # Shift so that tokens < n predict n
+            shift_logits = logits[..., :-1, :].contiguous()
+            shift_labels = labels[..., 1:].contiguous()
+            # Flatten the tokens
+            loss_fct = CrossEntropyLoss()
+            loss = loss_fct(shift_logits.view(-1, self.config.vocab_size), shift_labels.view(-1))
+        if not return_dict:
+            output = (logits,) + outputs[1:]
+            return (loss,) + output if loss is not None else output
+        return CausalLMOutputWithPast(
+            loss=loss,
+            logits=logits,
+            past_key_values=outputs.past_key_values,
+            hidden_states=outputs.hidden_states,
+            attentions=outputs.attentions,
+        )
+class ShapeOPTModel(OPTModel):
+    config_class = ShapeOPTConfig
+    def __init__(self, config: ShapeOPTConfig):
+        super(OPTModel,self).__init__(config)
+        self.decoder = ShapeOPTDecoder(config)
+        # Initialize weights and apply final processing
+        self.post_init()
+class ShapeOPTDecoder(OPTDecoder):
+    config_class = ShapeOPTConfig
+    def __init__(self, config: ShapeOPTConfig):
+        super(OPTDecoder,self).__init__(config)
+        self.config = config
+        self.dropout = config.dropout
+        self.layerdrop = config.layerdrop
+        self.padding_idx = config.pad_token_id
+        self.vocab_size = config.vocab_size
+        assert config.word_embed_proj_dim == config.hidden_size
+        self.embed_tokens = nn.Embedding(config.vocab_size, config.word_embed_proj_dim, self.padding_idx)
+        self.hidden_size = config.hidden_size
+        self.word_embed_proj_dim = config.word_embed_proj_dim
+        self.n_discrete_size = config.n_discrete_size
+        self.embed_positions = OPTLearnedPositionalEmbedding(config.max_position_embeddings, config.hidden_size)
+        self.token_embed_positions = OPTBonePositionalEmbedding(config.bone_per_token+3, config.word_embed_proj_dim)
+        self.bone_per_token = config.bone_per_token
+        self.cond_length = config.cond_length
+        self.cond_embed = nn.Embedding(2, config.word_embed_proj_dim)
+        # Note that the only purpose of `config._remove_final_layer_norm` is to keep backward compatibility
+        # with checkpoints that have been fine-tuned before transformers v4.20.1
+        # see https://github.com/facebookresearch/metaseq/pull/164
+        if config.do_layer_norm_before and not config._remove_final_layer_norm:
+            self.final_layer_norm = nn.LayerNorm(
+                config.hidden_size, elementwise_affine=config.layer_norm_elementwise_affine
+            )
+        else:
+            self.final_layer_norm = None
+        self.layers = nn.ModuleList([OPTDecoderLayer(config) for _ in range(config.num_hidden_layers)])
+        self._use_flash_attention_2 = config._attn_implementation == "flash_attention_2"
+        self.gradient_checkpointing = False
+        # Initialize weights and apply final processing
+        self.post_init()
+    def forward(
+        self,
+        input_ids: torch.LongTensor = None,
+        bone_ids: torch.LongTensor = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        head_mask: Optional[torch.Tensor] = None,
+        past_key_values: Optional[List[torch.FloatTensor]] = None,
+        inputs_embeds: Optional[torch.FloatTensor] = None,
+        use_cache: Optional[bool] = None,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+        return_dict: Optional[bool] = None,
+    ) -> Union[Tuple, BaseModelOutputWithPast]:
+        r"""
+        Args:
+            input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
+                Indices of input sequence tokens in the vocabulary. Padding will be ignored by default should you
+                provide it.
+                Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
+                [`PreTrainedTokenizer.__call__`] for details.
+                [What are input IDs?](../glossary#input-ids)
+            attention_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
+                Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:
+                - 1 for tokens that are **not masked**,
+                - 0 for tokens that are **masked**.
+                [What are attention masks?](../glossary#attention-mask)
+            head_mask (`torch.Tensor` of shape `(num_hidden_layers, num_attention_heads)`, *optional*):
+                Mask to nullify selected heads of the attention modules. Mask values selected in `[0, 1]`:
+                - 1 indicates the head is **not masked**,
+                - 0 indicates the head is **masked**.
+            past_key_values (`tuple(tuple(torch.FloatTensor))`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):
+                Tuple of `tuple(torch.FloatTensor)` of length `config.n_layers`, with each tuple having 2 tensors of
+                shape `(batch_size, num_heads, sequence_length, embed_size_per_head)`) and 2 additional tensors of
+                Contains pre-computed hidden-states (key and values in the self-attention blocks and in the
+                cross-attention blocks) that can be used (see `past_key_values` input) to speed up sequential decoding.
+                If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those
+                that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of
+                all `decoder_input_ids` of shape `(batch_size, sequence_length)`.
+            inputs_embeds (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
+                Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation.
+                This is useful if you want more control over how to convert `input_ids` indices into associated vectors
+                than the model's internal embedding lookup matrix.
+            output_attentions (`bool`, *optional*):
+                Whether or not to return the attentions tensors of all attention layers. See `attentions` under
+                returned tensors for more detail.
+            output_hidden_states (`bool`, *optional*):
+                Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors
+                for more detail.
+            return_dict (`bool`, *optional*):
+                Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
+        """
+        # OPT Decoder
+        # print("used my Trans")
+        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
+        output_hidden_states = (
+            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
+        )
+        use_cache = use_cache if use_cache is not None else self.config.use_cache
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+        # Transformer Decoder
+        if input_ids is not None and inputs_embeds is not None: # when training
+            pass
+        elif input_ids is not None: # when inference
+            assert not self.training
+            input_shape = input_ids.size()
+            input_ids = input_ids.view(-1, input_shape[-1])
+            inputs_embeds = self.embed_tokens(input_ids)
+            bone_embeds = self.token_embed_positions(attention_mask[:, self.cond_length:], bone_ids, input_ids,
+                                                     self.bone_per_token)
+            inputs_embeds += bone_embeds
+            cond_embed_query = torch.ones((inputs_embeds.shape[0], inputs_embeds.shape[1]), device=inputs_embeds.device,
+                                            dtype=inputs_embeds.dtype).long()
+            inputs_embeds = inputs_embeds + self.cond_embed(cond_embed_query)
+        elif inputs_embeds is not None: # when generate first skeleton token
+            assert not self.training
+            total_length = inputs_embeds.shape[1]
+            cond_embed_query = torch.zeros((inputs_embeds.shape[0], total_length), device=inputs_embeds.device,
+                                            dtype=inputs_embeds.dtype).long()
+            inputs_embeds = inputs_embeds + self.cond_embed(cond_embed_query)
+        else:
+            raise ValueError("You have to specify either decoder_input_ids or decoder_inputs_embeds")
+        past_key_values_length = past_key_values[0][0].shape[2] if past_key_values is not None else 0
+        # embed positions
+        if self._use_flash_attention_2:
+            # 2d mask is passed through the layers
+            assert attention_mask is not None
+            causal_attention_mask = attention_mask if 0 in attention_mask else None
+        else:
+            raise ValueError("Only flash_attention_2 is supported")
+        pos_embeds = self.embed_positions(attention_mask, past_key_values_length)
+        hidden_states = inputs_embeds + pos_embeds
+        # decoder layers
+        all_hidden_states = () if output_hidden_states else None
+        all_self_attns = () if output_attentions else None
+        next_decoder_cache = () if use_cache else None
+        # check if head_mask has a correct number of layers specified if desired
+        for attn_mask, mask_name in zip([head_mask], ["head_mask"]):
+            if attn_mask is not None:
+                if attn_mask.size()[0] != (len(self.layers)):
+                    raise ValueError(
+                        f"The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for"
+                        f" {head_mask.size()[0]}."
+                    )
+        for idx, decoder_layer in enumerate(self.layers):
+            # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
+            if output_hidden_states:
+                all_hidden_states += (hidden_states,)
+            if self.training:
+                dropout_probability = torch.rand([])
+                if dropout_probability < self.layerdrop:
+                    continue
+            past_key_value = past_key_values[idx] if past_key_values is not None else None
+            if self.gradient_checkpointing and self.training:
+                layer_outputs = self._gradient_checkpointing_func(
+                    decoder_layer.__call__,
+                    hidden_states,
+                    causal_attention_mask,
+                    head_mask[idx] if head_mask is not None else None,
+                    None,
+                    output_attentions,
+                    use_cache,
+                )
+            else:
+                layer_outputs = decoder_layer(
+                    hidden_states,
+                    attention_mask=causal_attention_mask,
+                    layer_head_mask=(head_mask[idx] if head_mask is not None else None),
+                    past_key_value=past_key_value,
+                    output_attentions=output_attentions,
+                    use_cache=use_cache,
+                )
+            hidden_states = layer_outputs[0]
+            if use_cache:
+                next_decoder_cache += (layer_outputs[2 if output_attentions else 1],)
+            if output_attentions:
+                all_self_attns += (layer_outputs[1],)
+        if self.final_layer_norm is not None:
+            hidden_states = self.final_layer_norm(hidden_states)
+        # add hidden states from the last decoder layer
+        if output_hidden_states:
+            all_hidden_states += (hidden_states,)
+        next_cache = next_decoder_cache if use_cache else None
+        if not return_dict:
+            return tuple(v for v in [hidden_states, next_cache, all_hidden_states, all_self_attns] if v is not None)
+        return BaseModelOutputWithPast(
+            last_hidden_state=hidden_states,
+            past_key_values=next_cache,
+            hidden_states=all_hidden_states,
+            attentions=all_self_attns,
+        )
+class OPTBonePositionalEmbedding(nn.Embedding):
+    """
+    This module learns positional embeddings up to a fixed maximum size.
+    """
+    def __init__(self, num_embeddings: int, embedding_dim: int):
+        super().__init__(num_embeddings, embedding_dim)
+    def forward(self, attention_mask=None, bone_ids = None, input_ids = None, bone_per_token = None):
+        """`input_ids_shape` is expected to be [bsz x seqlen]."""
+        if bone_ids is not None:
+            return super().forward(bone_ids)
+        assert input_ids.shape[1] == 1
+        idx_in_extra = torch.isin(input_ids, torch.LongTensor([0, 1, 2]).to(input_ids.device))
+        cur_ids = input_ids.clone().detach()
+        cur_index = (attention_mask.sum(dim=1, keepdim=True) - 2) % bone_per_token + 3
+        cur_ids[~idx_in_extra]=cur_index[~idx_in_extra]
+        return super().forward(cur_ids)
+AutoConfig.register("shape_opt", ShapeOPTConfig)
+AutoModelForCausalLM.register(ShapeOPTConfig, ShapeOPT)

skeleton_models/skeletongen.py ADDED Viewed

	@@ -0,0 +1,198 @@

+#  Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+import torch
+from torch import nn
+from transformers import AutoModelForCausalLM
+from third_partys.Michelangelo.encode import load_model
+from skeleton_models.shape_opt import ShapeOPTConfig
+def undiscretize(t, low, high, num_discrete):
+    assert (t >= 0).all() and (t <= num_discrete-1).all()
+    assert high > low
+    t = t.float()
+    t /= num_discrete
+    t = t * (high - low) + low
+    assert (t < high).all() and (t >= low).all()
+    return t
+class SkeletonGPT(nn.Module):
+    def __init__(self, args):
+        super().__init__()
+        self.args = args
+        self.point_encoder = load_model()
+        self.cond_length = 257
+        self.cond_dim = 768
+        self.n_discrete_size = args.n_discrete_size
+        self.bone_per_token = 6  # (2 joints per bone)
+        self.max_length = int(args.n_max_bones * self.bone_per_token + 2 + self.cond_length)
+        self.pad_id = -1
+        self.coor_continuous_range = (-0.5, 0.5)
+        vocab_size = self.n_discrete_size + 3 # 3 for bos, eos, pad
+        self.config = ShapeOPTConfig.from_pretrained(
+            args.llm,
+            n_positions=self.max_length,
+            max_position_embeddings=self.max_length,
+            vocab_size = vocab_size,
+            _attn_implementation="flash_attention_2"
+        )
+        self.bos_token_id = 0
+        self.eos_token_id = 1
+        self.pad_token_id = 2
+        self.config.bos_token_id = self.bos_token_id
+        self.config.eos_token_id = self.eos_token_id
+        self.config.pad_token_id = self.pad_token_id
+        self.config._attn_implementation ="flash_attention_2"
+        self.config.n_discrete_size = self.n_discrete_size
+        self.config.bone_per_token = self.bone_per_token
+        self.config.cond_length = self.cond_length
+        self.config.word_embed_proj_dim = self.config.hidden_size # 1024
+        self.transformer = AutoModelForCausalLM.from_config(
+            config=self.config, attn_implementation="flash_attention_2")
+        self.cond_head_proj = nn.Linear(self.cond_dim, self.config.word_embed_proj_dim)
+        self.cond_proj = nn.Linear(self.cond_dim, self.config.word_embed_proj_dim)
+        self.eval()
+    def detokenize(self, input_ids):
+        # input_ids: torch.Tensor of shape (batch_size, seq_length)
+        batch_size = input_ids.size(0)
+        continuous_coors_list = []
+        num_bones_list = []
+        for i in range(batch_size):
+            cur_ids = input_ids[i]  # Shape: (seq_length,)
+            # Remove padding tokens
+            cur_ids = cur_ids[cur_ids != self.pad_id]  # Shape: (effective_seq_length,)
+            # Check if length is a multiple of 6 (2 joints * 3 coordinates)
+            if cur_ids.numel() % 6 != 0:
+                return None
+                # raise ValueError(f"Invalid length of input_ids in sample {i}. It should be a multiple of 6.")
+            num_bones = cur_ids.numel() // 6
+            num_bones_list.append(num_bones)
+            # Reshape into (num_bones, 6)
+            bone_coords = cur_ids.view(num_bones, 6)  # Shape: (num_bones, 6)
+            # Undiscretize the coordinates
+            # Initialize tensor to hold bone coordinates
+            bones_coors = torch.zeros((num_bones, 2, 3), dtype=torch.float16, device=cur_ids.device)
+            for j in range(num_bones):
+                bone_coord = bone_coords[j]  # Shape: (6,)
+                # Split into two joints
+                joint1_ids = bone_coord[:3]
+                joint2_ids = bone_coord[3:]
+                # Undiscretize joint coordinates
+                joint1_coords = undiscretize(joint1_ids, self.coor_continuous_range[0], self.coor_continuous_range[1], self.n_discrete_size)
+                joint2_coords = undiscretize(joint2_ids, self.coor_continuous_range[0], self.coor_continuous_range[1], self.n_discrete_size)
+                # Assign to bones_coors
+                bones_coors[j, 0, :] = joint1_coords
+                bones_coors[j, 1, :] = joint2_coords
+            continuous_coors_list.append(bones_coors)
+        max_num_bones = max(num_bones_list)
+        # Initialize the continuous_coors tensor with NaNs
+        continuous_coors = torch.full(
+            (batch_size, max_num_bones, 2, 3),
+            float('nan'),
+            dtype=torch.float16,
+            device=input_ids.device
+        )
+        # Place the bones_coors into continuous_coors
+        for i in range(batch_size):
+            num_bones = num_bones_list[i]
+            continuous_coors[i, :num_bones, :, :] = continuous_coors_list[i]
+        return continuous_coors  # Shape: (batch_size, max_num_bones, 2, 3)
+    # def forward(self, data_dict: dict, is_eval: bool = False) -> dict:
+    #     return self.generate(data_dict)
+    def process_point_feature(self, point_feature):
+        encode_feature = torch.zeros(self.args.batchsize_per_gpu, self.cond_length, self.config.word_embed_proj_dim,
+                                    device=self.cond_head_proj.weight.device, dtype=self.cond_head_proj.weight.dtype)
+        encode_feature[:, 0] = self.cond_head_proj(point_feature[:, 0])
+        shape_latents = self.point_encoder.to_shape_latents(point_feature[:, 1:])
+        encode_feature[:, 1:] = self.cond_proj(shape_latents)
+        return encode_feature
+    @torch.no_grad()
+    def generate(self, data_dict) -> dict:
+        point_feature = self.point_encoder.encode_latents(data_dict["pc_normal"])
+        processed_point_feature = self.process_point_feature(point_feature=point_feature)
+        generate_length = self.max_length - self.cond_length
+        net_device = next(self.parameters()).device
+        outputs = torch.ones(self.args.batchsize_per_gpu, generate_length).long().to(net_device) * self.eos_token_id
+        # batch x ntokens
+        if self.args.num_beams is not None and "pc_normal" in data_dict:
+            results = self.transformer.generate(
+                inputs_embeds=processed_point_feature,
+                max_new_tokens=generate_length,  # all faces plus two
+                num_beams=self.args.num_beams,
+                bos_token_id=self.bos_token_id,
+                eos_token_id=self.eos_token_id,
+                pad_token_id=self.pad_token_id,
+            )
+        else:
+            results = self.transformer.generate(
+                inputs_embeds = processed_point_feature,
+                max_new_tokens = generate_length, # all faces plus two
+                do_sample=True,
+                top_k=50,
+                top_p=0.95,
+                bos_token_id = self.bos_token_id,
+                eos_token_id = self.eos_token_id,
+                pad_token_id = self.pad_token_id,
+            )
+        assert results.shape[1] <= generate_length # B x ID  bos is not included since it's predicted
+        outputs[:, :results.shape[1]] = results
+        # batch x ntokens ====> batch x ntokens x D
+        outputs = outputs[:, 1: -1] # eos and bos removed
+        outputs[outputs == self.bos_token_id] = self.pad_id
+        outputs[outputs == self.eos_token_id] = self.pad_id
+        outputs[outputs == self.pad_token_id] = self.pad_id
+        outputs[outputs != self.pad_id] -= 3
+        gen_joints = self.detokenize(outputs)
+        return gen_joints

utils/eval_utils.py ADDED Viewed

	@@ -0,0 +1,57 @@

+# Modified from https://github.com/zhan-xu/RigNet
+import numpy as np
+##### for quantitative calculation
+def chamfer_dist(pt1, pt2):
+    pt1 = pt1[np.newaxis, :, :]
+    pt2 = pt2[:, np.newaxis, :]
+    dist = np.sqrt(np.sum((pt1 - pt2) ** 2, axis=2))
+    min_left = np.mean(np.min(dist, axis=0))
+    min_right = np.mean(np.min(dist, axis=1))
+    return (min_left + min_right) / 2
+def oneway_chamfer(pt_src, pt_dst):
+    pt1 = pt_src[np.newaxis, :, :]
+    pt2 = pt_dst[:, np.newaxis, :]
+    dist = np.sqrt(np.sum((pt1 - pt2) ** 2, axis=2))
+    avg_dist = np.mean(np.min(dist, axis=0))
+    return avg_dist
+def joint2bone_chamfer_dist(joints1, bones1, joints2, bones2):
+    bone_sample_1 = sample_skel(joints1, bones1)
+    bone_sample_2 = sample_skel(joints2, bones2)
+    dist1 = oneway_chamfer(joints1, bone_sample_2)
+    dist2 = oneway_chamfer(joints2, bone_sample_1)
+    return (dist1 + dist2) / 2
+def bone2bone_chamfer_dist(joints1, bones1, joints2, bones2):
+    bone_sample_1 = sample_skel(joints1, bones1)
+    bone_sample_2 = sample_skel(joints2, bones2)
+    return chamfer_dist(bone_sample_1, bone_sample_2)
+def sample_bone(p_pos, ch_pos):
+    ray = ch_pos - p_pos
+    bone_length = np.linalg.norm(p_pos - ch_pos)
+    num_step = np.round(bone_length / 0.005).astype(int)
+    i_step = np.arange(0, num_step + 1)
+    unit_step = ray / (num_step + 1e-30)
+    unit_step = np.repeat(unit_step[np.newaxis, :], num_step + 1, axis=0)
+    res = p_pos + unit_step * i_step[:, np.newaxis]
+    return res
+def sample_skel(joints, bones):
+    bone_sample = []
+    for parent_idx, child_idx in bones:
+        p_pos = joints[parent_idx]
+        ch_pos = joints[child_idx]
+        res = sample_bone(p_pos, ch_pos)
+        bone_sample.append(res)
+    if bone_sample:
+        bone_sample = np.concatenate(bone_sample, axis=0)
+    else:
+        bone_sample = np.empty((0, 3))
+    return bone_sample

utils/mesh_to_pc.py ADDED Viewed

	@@ -0,0 +1,84 @@

+# Modified from https://github.com/buaacyw/MeshAnything
+import mesh2sdf.core
+import numpy as np
+import skimage.measure
+import trimesh
+import time
+from typing import List, Tuple
+class MeshProcessor:
+    """A class to handle mesh normalization, watertight conversion and point cloud sampling."""
+    @staticmethod
+    def normalize_mesh_vertices(vertices: np.ndarray, scaling_factor: float = 0.95) -> Tuple[np.ndarray, np.ndarray, float]:
+        """
+        Normalize mesh vertices to be centered at origin and scaled appropriately.
+        """
+        min_bounds = vertices.min(axis=0)
+        max_bounds = vertices.max(axis=0)
+        center = (min_bounds + max_bounds) * 0.5
+        max_dimension = (max_bounds - min_bounds).max()
+        scale = 2.0 * scaling_factor / max_dimension
+        normalized_vertices = (vertices - center) * scale
+        return normalized_vertices, center, scale
+    @staticmethod
+    def convert_to_watertight(mesh: trimesh.Trimesh, octree_depth: int = 7) -> trimesh.Trimesh:
+        """
+        Convert to watertight using mesh2sdf and marching cubes.
+        """
+        grid_size = 2 ** octree_depth
+        iso_level = 2 / grid_size
+        # Normalize vertices for SDF computation
+        normalized_vertices, original_center, original_scale = MeshProcessor.normalize_mesh_vertices(mesh.vertices)
+        # Compute signed distance field
+        sdf = mesh2sdf.core.compute(normalized_vertices, mesh.faces, size=grid_size)
+        # Run marching cubes algorithm
+        vertices, faces, normals, _ = skimage.measure.marching_cubes(np.abs(sdf), iso_level)
+        # Transform vertices back to original coordinate system
+        vertices = vertices / grid_size * 2 - 1  # Map to [-1, 1] range
+        vertices = vertices / original_scale + original_center
+        # Create new watertight mesh
+        watertight_mesh = trimesh.Trimesh(vertices, faces, normals=normals)
+        return watertight_mesh
+    @staticmethod
+    def convert_meshes_to_point_clouds(
+        meshes: List[trimesh.Trimesh],
+        points_per_mesh: int = 8192,
+        apply_marching_cubes: bool = False,
+        octree_depth: int = 7
+    ) -> List[np.ndarray]:
+        """
+        Process a list of meshes into point clouds with normals.
+        """
+        point_clouds_with_normals = []
+        processed_meshes = []
+        for mesh in meshes:
+            # Optionally convert to watertight mesh
+            if apply_marching_cubes:
+                start_time = time.time()
+                mesh = MeshProcessor.convert_to_watertight(mesh, octree_depth=octree_depth)
+                processing_time = time.time() - start_time
+                print(f"Marching cubes complete! Time: {processing_time:.2f}s")
+            # Store processed mesh
+            processed_meshes.append(mesh)
+            # Sample points and get corresponding face normals
+            points, face_indices = mesh.sample(points_per_mesh, return_index=True)
+            point_normals = mesh.face_normals[face_indices]
+            # Combine points and normals
+            points_with_normals = np.concatenate([points, point_normals], axis=-1, dtype=np.float16)
+            point_clouds_with_normals.append(points_with_normals)
+        return point_clouds_with_normals

utils/save_utils.py ADDED Viewed

	@@ -0,0 +1,578 @@

+#  Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+import os
+import numpy as np
+import cv2
+import json
+import trimesh
+from collections import deque, defaultdict
+from scipy.cluster.hierarchy import linkage, fcluster
+from scipy.spatial.distance import cdist
+from data_utils.pyrender_wrapper import PyRenderWrapper
+from data_utils.data_loader import DataLoader
+def save_mesh(vertices, faces, filename):
+    mesh = trimesh.Trimesh(vertices=vertices, faces=faces)
+    mesh.export(filename, file_type='obj')
+def pred_joints_and_bones(bone_coor):
+    """
+    get joints (j,3) and bones (b,2) from (b,2,3), preserve the parent-child relationship
+    """
+    parent_coords = bone_coor[:, 0, :]  # (b, 3)
+    child_coords = bone_coor[:, 1, :]   # (b, 3)
+    all_coords = np.vstack([parent_coords, child_coords])  # (2b, 3)
+    pred_joints, indices = np.unique(all_coords, axis=0, return_inverse=True)
+    b = bone_coor.shape[0]
+    parent_indices = indices[:b]
+    child_indices = indices[b:]
+    pred_bones = np.column_stack([parent_indices, child_indices])
+    valid_bones = pred_bones[parent_indices != child_indices]
+    return pred_joints, valid_bones
+def find_connected_components(joints, bones):
+    """Find connected components in the skeleton graph."""
+    n_joints = len(joints)
+    graph = defaultdict(list)
+    # Build adjacency list
+    for parent, child in bones:
+        graph[parent].append(child)
+        graph[child].append(parent)
+    visited = [False] * n_joints
+    components = []
+    for i in range(n_joints):
+        if not visited[i]:
+            component = []
+            queue = deque([i])
+            visited[i] = True
+            while queue:
+                node = queue.popleft()
+                component.append(node)
+                for neighbor in graph[node]:
+                    if not visited[neighbor]:
+                        visited[neighbor] = True
+                        queue.append(neighbor)
+            components.append(component)
+    return components
+def ensure_skeleton_connectivity(joints, bones, root_index=None, merge_distance_threshold=0.01):
+    """
+    Ensure skeleton is fully connected.
+    - If distance < merge_distance_threshold: merge joints
+    - If distance >= merge_distance_threshold: connect with bone
+    """
+    current_joints = joints.copy()
+    current_bones = list(bones)
+    current_root = root_index
+    iteration = 0
+    while True:
+        components = find_connected_components(current_joints, current_bones)
+        if len(components) == 1:
+            # print("Successfully ensured skeleton connectivity")
+            break
+        # Find the globally closest pair of components
+        min_distance = float('inf')
+        best_pair = None
+        for i in range(len(components)):
+            for j in range(i + 1, len(components)):
+                comp1_joints = current_joints[components[i]]
+                comp2_joints = current_joints[components[j]]
+                distances = cdist(comp1_joints, comp2_joints)
+                min_idx = np.unravel_index(np.argmin(distances), distances.shape)
+                distance = distances[min_idx]
+                if distance < min_distance:
+                    min_distance = distance
+                    best_pair = (i, j, components[i][min_idx[0]], components[j][min_idx[1]], min_idx)
+        if best_pair is None:
+            print("Warning: Could not find valid component pair to connect")
+            break
+        comp1_idx, comp2_idx, joint1_idx, joint2_idx, min_idx = best_pair
+        if min_distance < merge_distance_threshold:
+            # Merge the joints
+            # print(f"Iteration {iteration + 1}: Merging closest joints {joint1_idx} and {joint2_idx} "
+            #       f"(distance: {min_distance:.4f})")
+            # Always merge joint2 into joint1
+            merge_map = {joint2_idx: joint1_idx}
+            # Update bones
+            updated_bones = []
+            for parent, child in current_bones:
+                new_parent = merge_map.get(parent, parent)
+                new_child = merge_map.get(child, child)
+                if new_parent != new_child:  # Remove self-loops
+                    updated_bones.append([new_parent, new_child])
+            # Update root
+            if current_root == joint2_idx:
+                current_root = joint1_idx
+            # Remove the merged joint and update indices
+            joint_to_remove = joint2_idx
+            mask = np.ones(len(current_joints), dtype=bool)
+            mask[joint_to_remove] = False
+            current_joints = current_joints[mask]
+            # Create index mapping for remaining joints
+            old_to_new = {}
+            new_idx = 0
+            for old_idx in range(len(mask)):
+                if mask[old_idx]:
+                    old_to_new[old_idx] = new_idx
+                    new_idx += 1
+            # Update bone indices
+            current_bones = [[old_to_new[parent], old_to_new[child]]
+                           for parent, child in updated_bones
+                           if parent in old_to_new and child in old_to_new]
+            # Update root index
+            if current_root is not None and current_root in old_to_new:
+                current_root = old_to_new[current_root]
+        else:
+            # Connect with bone
+            # print(f"Iteration {iteration + 1}: Connecting closest components with bone {joint1_idx} -> {joint2_idx} "
+            #       f"(distance: {min_distance:.4f})")
+            current_bones.append([joint1_idx, joint2_idx])
+        iteration += 1
+        # prevent infinite loops
+        if iteration > len(joints):
+            print(f"Warning: Maximum iterations reached ({iteration}), stopping")
+            break
+    current_bones = np.array(current_bones) if len(current_bones) > 0 else np.array([]).reshape(0, 2)
+    # Final connectivity verification
+    final_components = find_connected_components(current_joints, current_bones)
+    if len(final_components) == 1:
+        pass
+    else:
+        print(f"Warning: Still have {len(final_components)} disconnected components after {iteration} iterations")
+    return current_joints, current_bones, current_root
+def merge_duplicate_joints_and_fix_bones(joints, bones, tolerance=0.0025, root_index=None):
+    """
+    merge duplicate joints that are within a certain tolerance distance, and fix bones to maintain connectivity.
+    Also merge bones that become duplicates after joint merging.
+    """
+    n_joints = len(joints)
+    # find merge joint groups
+    merge_groups = []
+    used = [False] * n_joints
+    for i in range(n_joints):
+        if used[i]:
+            continue
+        # find all joints within tolerance distance to joint i
+        group = [i]
+        for j in range(i + 1, n_joints):
+            if not used[j]:
+                dist = np.linalg.norm(joints[i] - joints[j])
+                if dist < tolerance:
+                    group.append(j)
+                    used[j] = True
+        used[i] = True
+        merge_groups.append(group)
+        # if len(group) > 1:
+        #     print(f"find duplicate joints group: {group}")
+    # build merge map: choose representative joint
+    merge_map = {}
+    for group in merge_groups:
+        if root_index is not None and root_index in group:
+            representative = root_index
+        else:
+            representative = group[0]  # else choose the first one as representative
+        for joint_idx in group:
+            merge_map[joint_idx] = representative
+    # track root joint change
+    intermediate_root_index = None
+    if root_index is not None:
+        intermediate_root_index = merge_map.get(root_index, root_index)
+        # if intermediate_root_index != root_index:
+        #     print(f"root joint index changed from {root_index} to {intermediate_root_index}")
+    # update bones: remove self-loop bones, and merge duplicate bones
+    updated_bones = []
+    for parent, child in bones:
+        new_parent = merge_map.get(parent, parent)
+        new_child = merge_map.get(child, child)
+        if new_parent != new_child: # remove self-loop bones
+            updated_bones.append([new_parent, new_child])
+    # remove duplicate bones
+    unique_bones = []
+    seen_bones = set()
+    for bone in updated_bones:
+        bone_key = tuple(bone)  # keep the order of [parent, child]
+        if bone_key not in seen_bones:
+            seen_bones.add(bone_key)
+            unique_bones.append(bone)
+    # re-index joints to remove unused joints
+    used_joint_indices = set()
+    for parent, child in unique_bones:
+        used_joint_indices.add(parent)
+        used_joint_indices.add(child)
+    if intermediate_root_index is not None:
+        used_joint_indices.add(intermediate_root_index)
+    used_joint_indices = sorted(list(used_joint_indices))
+    # new index for used joints
+    old_to_new = {old_idx: new_idx for new_idx, old_idx in enumerate(used_joint_indices)}
+    final_joints = joints[used_joint_indices]
+    final_bones = np.array([[old_to_new[parent], old_to_new[child]]
+                           for parent, child in unique_bones])
+    final_root_index = None
+    if intermediate_root_index is not None:
+        final_root_index = old_to_new[intermediate_root_index]
+        if root_index is not None and final_root_index != root_index:
+            print(f"final root index: {root_index} -> {final_root_index}")
+    removed_joints = n_joints - len(final_joints)
+    removed_bones = len(bones) - len(final_bones)
+    # print
+    # if removed_joints > 0 or removed_bones > 0:
+    #     print(f"merge results:")
+    #     print(f"  joint number: {n_joints} -> {len(final_joints)} (remove {removed_joints})")
+    #     print(f"  bone number: {len(bones)} -> {len(final_bones)} (remove {removed_bones})")
+    # Ensure skeleton connectivity with relaxed threshold
+    final_joints, final_bones, final_root_index = ensure_skeleton_connectivity(
+        final_joints, final_bones, final_root_index,
+        merge_distance_threshold=tolerance*8  # More relaxed threshold for connectivity
+    )
+    if root_index is not None:
+        return final_joints, final_bones, final_root_index
+    else:
+        return final_joints, final_bones
+def save_skeleton_to_txt(pred_joints, pred_bones, pred_root_index, hier_order, vertices, filename='skeleton.txt'):
+    """
+    save skeleton to txt file, the format follows Rignet (joints, root, hier)
+    if hier_order: the first joint index in bone is root joint index, and parent-child relationship is established in bones.
+    else: we set the joint nearest to the mesh center as the root joint, and then build hierarchy starting from root.
+    """
+    num_joints = pred_joints.shape[0]
+    # assign joint names
+    joint_names = [f'joint{i}' for i in range(num_joints)]
+    adjacency = defaultdict(list)
+    for bone in pred_bones:
+        idx_a, idx_b = bone
+        adjacency[idx_a].append(idx_b)
+        adjacency[idx_b].append(idx_a)
+    # find root joint
+    if hier_order:
+        root_idx = pred_root_index
+    else:
+        centroid = np.mean(vertices, axis=0)
+        distances = np.linalg.norm(pred_joints - centroid, axis=1)
+        root_idx = np.argmin(distances)
+    root_name = joint_names[root_idx]
+    # build hierarchy
+    parent_map = {}
+    if hier_order:
+        visited = set()
+        for parent_idx, child_idx in pred_bones:
+            if child_idx not in parent_map:
+                parent_map[child_idx] = parent_idx
+                visited.add(child_idx)
+                visited.add(parent_idx)
+        parent_map[root_idx] = None
+    else:
+        visited = set([root_idx])
+        queue = deque([root_idx])
+        parent_map[root_idx] = None
+        while queue:
+            current_idx = queue.popleft()
+            for neighbor_idx in adjacency[current_idx]:
+                if neighbor_idx not in visited:
+                    parent_map[neighbor_idx] = current_idx
+                    visited.add(neighbor_idx)
+                    queue.append(neighbor_idx)
+    if len(visited) != num_joints:
+        print(f"bones are not fully connected, leaving {num_joints - len(visited)} joints unconnected.")
+    # save joints
+    joints_lines = []
+    for idx, coord in enumerate(pred_joints):
+        name = joint_names[idx]
+        joints_line = f'joints {name} {coord[0]:.8f} {coord[1]:.8f} {coord[2]:.8f}'
+        joints_lines.append(joints_line)
+    # save root name
+    root_line = f'root {root_name}'
+    # save hierarchy
+    hier_lines = []
+    for child_idx, parent_idx in parent_map.items():
+        if parent_idx is not None:
+            parent_name = joint_names[parent_idx]
+            child_name = joint_names[child_idx]
+            hier_line = f'hier {parent_name} {child_name}'
+            hier_lines.append(hier_line)
+    with open(filename, 'w') as file:
+        for line in joints_lines:
+            file.write(line + '\n')
+        file.write(root_line + '\n')
+        for line in hier_lines:
+            file.write(line + '\n')
+def save_skeleton_obj(joints, bones, save_path, root_index=None, radius_sphere=0.01,
+                     radius_bone=0.005, segments=16, stacks=16, use_cone=False):
+    """
+    Save skeletons to obj file, each connection contains two red spheres (joint) and one blue cylinder (bone).
+    if root index is known, set root sphere to green.
+    """
+    all_vertices = []
+    all_colors = []
+    all_faces = []
+    vertex_offset = 0
+    # create spheres for joints
+    for i, joint in enumerate(joints):
+        # define color
+        if root_index is not None and i == root_index:
+            color = (0, 1, 0)  # green for root joint
+        else:
+            color = (1, 0, 0)  # red for other joints
+        # create joint sphere
+        sphere_vertices, sphere_faces = create_sphere(joint, radius=radius_sphere, segments=segments, stacks=stacks)
+        all_vertices.extend(sphere_vertices)
+        all_colors.extend([color] * len(sphere_vertices))
+        # adjust face index
+        adjusted_sphere_faces = [(v1 + vertex_offset, v2 + vertex_offset, v3 + vertex_offset) for (v1, v2, v3) in sphere_faces]
+        all_faces.extend(adjusted_sphere_faces)
+        vertex_offset += len(sphere_vertices)
+    # create bones
+    for bone in bones:
+        parent_idx, child_idx = bone
+        parent = joints[parent_idx]
+        child = joints[child_idx]
+        try:
+            bone_vertices, bone_faces = create_bone(parent, child, radius=radius_bone, segments=segments, use_cone=use_cone)
+        except ValueError as e:
+            print(f"Skipping connection {parent_idx}-{child_idx}, reason: {e}")
+            continue
+        all_vertices.extend(bone_vertices)
+        all_colors.extend([(0, 0, 1)] * len(bone_vertices))  # blue
+        # adjust face index
+        adjusted_bone_faces = [(v1 + vertex_offset, v2 + vertex_offset, v3 + vertex_offset) for (v1, v2, v3) in bone_faces]
+        all_faces.extend(adjusted_bone_faces)
+        vertex_offset += len(bone_vertices)
+    # save to obj
+    obj_lines = []
+    for v, c in zip(all_vertices, all_colors):
+        obj_lines.append(f"v {v[0]} {v[1]} {v[2]} {c[0]} {c[1]} {c[2]}")
+    obj_lines.append("")
+    for face in all_faces:
+        obj_lines.append(f"f {face[0]} {face[1]} {face[2]}")
+    with open(save_path, 'w') as obj_file:
+        obj_file.write("\n".join(obj_lines))
+def create_sphere(center, radius=0.01, segments=16, stacks=16):
+    vertices = []
+    faces = []
+    for i in range(stacks + 1):
+        lat = np.pi / 2 - i * np.pi / stacks
+        xy = radius * np.cos(lat)
+        z = radius * np.sin(lat)
+        for j in range(segments):
+            lon = j * 2 * np.pi / segments
+            x = xy * np.cos(lon) + center[0]
+            y = xy * np.sin(lon) + center[1]
+            vertices.append((x, y, z + center[2]))
+    for i in range(stacks):
+        for j in range(segments):
+            first = i * segments + j
+            second = first + segments
+            third = first + 1 if (j + 1) < segments else i * segments
+            fourth = second + 1 if (j + 1) < segments else (i + 1) * segments
+            faces.append((first + 1, second + 1, fourth + 1))
+            faces.append((first + 1, fourth + 1, third + 1))
+    return vertices, faces
+def create_bone(start, end, radius=0.005, segments=16, use_cone=False):
+    dir_vector = np.array(end) - np.array(start)
+    height = np.linalg.norm(dir_vector)
+    if height == 0:
+        raise ValueError("Start and end points cannot be the same for a cone.")
+    dir_vector = dir_vector / height
+    z = np.array([0, 0, 1])
+    if np.allclose(dir_vector, z):
+        R = np.identity(3)
+    elif np.allclose(dir_vector, -z):
+        R = np.array([[-1,0,0],[0,-1,0],[0,0,1]])
+    else:
+        v = np.cross(z, dir_vector)
+        s = np.linalg.norm(v)
+        c = np.dot(z, dir_vector)
+        kmat = np.array([[0, -v[2], v[1]],
+                            [v[2], 0, -v[0]],
+                            [-v[1], v[0], 0]])
+        R = np.identity(3) + kmat + np.matmul(kmat, kmat) * ((1 - c) / (s**2))
+    theta = np.linspace(0, 2 * np.pi, segments, endpoint=False)
+    base_circle = np.array([np.cos(theta), np.sin(theta), np.zeros(segments)]) * radius
+    vertices = []
+    for point in base_circle.T:
+        rotated = np.dot(R, point) + np.array(start)
+        vertices.append(tuple(rotated))
+    faces = []
+    if use_cone:
+        vertices.append(tuple(end))
+        apex_idx = segments + 1
+        for i in range(segments):
+            next_i = (i + 1) % segments
+            faces.append((i + 1, next_i + 1, apex_idx))
+    else:
+        top_circle = np.array([np.cos(theta), np.sin(theta), np.ones(segments)]) * radius
+        for point in top_circle.T:
+            point_scaled = np.array([point[0], point[1], height])
+            rotated = np.dot(R, point_scaled) + np.array(start)
+            vertices.append(tuple(rotated))
+        for i in range(segments):
+            next_i = (i + 1) % segments
+            faces.append((i + 1, next_i + 1, next_i + segments + 1))
+            faces.append((i + 1, next_i + segments + 1, i + segments + 1))
+    return vertices, faces
+def render_mesh_with_skeleton(joints, bones, vertices, faces, output_dir, filename, prefix='pred', root_idx=None):
+    """
+    Render the mesh with skeleton using PyRender.
+    """
+    loader = DataLoader()
+    raw_size = (960, 960)
+    renderer = PyRenderWrapper(raw_size)
+    save_dir = os.path.join(output_dir, 'render_results')
+    os.makedirs(save_dir, exist_ok=True)
+    loader.joints = joints
+    loader.bones = bones
+    loader.root_idx = root_idx
+    mesh = trimesh.Trimesh(vertices=vertices, faces=faces)
+    mesh.visual.vertex_colors[:, 3] = 100  # set transparency
+    loader.mesh = mesh
+    v = mesh.vertices
+    xmin, ymin, zmin = v.min(axis=0)
+    xmax, ymax, zmax = v.max(axis=0)
+    loader.bbox_center = np.array([(xmax + xmin)/2, (ymax + ymin)/2, (zmax + zmin)/2])
+    loader.bbox_size = np.array([xmax - xmin, ymax - ymin, zmax - zmin])
+    loader.bbox_scale = max(xmax - xmin, ymax - ymin, zmax - zmin)
+    loader.normalize_coordinates()
+    input_dict = loader.query_mesh_rig()
+    angles = [0, np.pi/2, np.pi, 3*np.pi/2]
+    distance = np.max(loader.bbox_size) * 2
+    subfolder_path = os.path.join(save_dir, filename + '_' + prefix)
+    os.makedirs(subfolder_path, exist_ok=True)
+    for i, angle in enumerate(angles):
+        renderer.set_camera_view(angle, loader.bbox_center, distance)
+        renderer.align_light_to_camera()
+        color = renderer.render(input_dict)[0]
+        output_filename = f"{filename}_{prefix}_view{i+1}.png"
+        output_filepath = os.path.join(subfolder_path, output_filename)
+        cv2.imwrite(output_filepath, color)
+def save_args(args, output_dir, filename="config.json"):
+    args_dict = vars(args)
+    os.makedirs(output_dir, exist_ok=True)
+    config_path = os.path.join(output_dir, filename)
+    with open(config_path, 'w') as f:
+        json.dump(args_dict, f, indent=4)

utils/skeleton_data_loader.py ADDED Viewed

	@@ -0,0 +1,97 @@

+#  Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+import torch
+from torch import is_tensor
+from torch.utils.data import Dataset
+from torch.nn.utils.rnn import pad_sequence
+from data_utils.save_npz import normalize_to_unit_cube
+import numpy as np
+class SkeletonData(Dataset):
+    """
+    A PyTorch Dataset to load and process skeleton data.
+    """
+    def __init__(self, data, args, is_training):
+        self.data = data
+        self.input_pc_num = args.input_pc_num
+        self.is_training = is_training
+        self.hier_order = args.hier_order
+        print(f"[Dataset] Created from {len(self.data)} entries")
+    def __len__(self):
+        return len(self.data)
+    def __getitem__(self, idx):
+        data = self.data[idx]
+        joints = data['joints']
+        vertices = data['vertices']
+        pc_normal = data['pc_w_norm']
+        indices = np.random.choice(pc_normal.shape[0], self.input_pc_num, replace=False)
+        pc_normal = pc_normal[indices, :]
+        pc_coor = pc_normal[:, :3]
+        normal = pc_normal[:, 3:]
+        if np.linalg.norm(normal, axis=1, keepdims=True).min() < 0.99:
+            print("normal reroll")
+            return self.__getitem__(np.random.randint(0, len(self.data)))
+        data_dict = {}
+        # normalize normal
+        normal = normal / np.linalg.norm(normal, axis=1, keepdims=True)
+        # scale to -0.5 to 0.5
+        _, center, scale = normalize_to_unit_cube(vertices.copy(), scale_factor=0.9995)
+        joints = (joints - center) * scale # align joints with pc first
+        bounds = np.array([pc_coor.min(axis=0), pc_coor.max(axis=0)])
+        pc_center = (bounds[0] + bounds[1])[None, :]  / 2
+        pc_scale = (bounds[1] - bounds[0]).max() + 1e-5
+        pc_coor = (pc_coor - pc_center) / pc_scale
+        joints = (joints - pc_center) / pc_scale
+        joints = joints.clip(-0.5, 0.5)
+        data_dict['joints'] = torch.from_numpy(np.asarray(joints).astype(np.float16))
+        data_dict['bones'] = torch.from_numpy(data['bones'].astype(np.int64))
+        pc_coor = pc_coor / np.abs(pc_coor).max() * 0.9995
+        data_dict['pc_normal'] = torch.from_numpy(np.concatenate([pc_coor, normal], axis=-1).astype(np.float16))
+        data_dict['vertices'] = torch.from_numpy(data['vertices'].astype(np.float16))
+        data_dict['faces'] = torch.from_numpy(data['faces'].astype(np.int64))
+        data_dict['uuid'] = data['uuid']
+        data_dict['root_index'] = str(data['root_index'])
+        data_dict['transform_params'] = torch.tensor([
+            center[0], center[1], center[2],
+            scale,
+            pc_center[0][0], pc_center[0][1], pc_center[0][2],
+            pc_scale
+        ], dtype=torch.float32)
+        return data_dict
+    @classmethod
+    def load(cls, args, is_training=True):
+        loaded_data = np.load(args.dataset_path, allow_pickle=True)
+        data = []
+        for item in loaded_data["arr_0"]:
+            data.append(item)
+        print(f"[Dataset] Loaded {len(data)} entries")
+        return cls(data, args, is_training)