udbbdh commited on Jan 4

Commit

7340df2

verified ·

1 Parent(s): 799d5e6

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +3 -0
40w_2000-100000edge_2000-75000active.txt +3 -0
40w_2000-200000edge_2000-100000active.txt +3 -0
MERGED_DATASET_count_200_2000_10000_train_2000min_100000max.txt +0 -0
MERGED_DATASET_filtered_2000-100000edge_2000-32678active.txt +0 -0
__pycache__/dataset_triposf.cpython-310.pyc +0 -0
__pycache__/dataset_triposf_head.cpython-310.pyc +0 -0
__pycache__/query_point.cpython-310.pyc +0 -0
__pycache__/utils.bresenham_3d_array-192.py310.1.nbc +3 -0
__pycache__/utils.bresenham_3d_array-192.py310.nbi +0 -0
__pycache__/utils.cpython-310.pyc +0 -0
__pycache__/vertex_encoder.cpython-310.pyc +0 -0
config_edge_1024_error_8enc_8dec_woself_finetune_128to1024_addhead.yaml +82 -0
config_edge_1024_error_8enc_8dec_woself_finetune_128to1024_head_woca.yaml +97 -0
config_edge_1024_error_8enc_8dec_woself_finetune_128to512.yaml +101 -0
config_slat_flow_128to256_pointnet_test.yaml +124 -0
dataset_triposf.py +924 -0
dataset_triposf_head.py +1000 -0
debug_viz/step_0_batch_0.ply +0 -0
debug_viz/step_0_batch_1.ply +0 -0
filter_active_voxels.py +106 -0
generate_npz.py +118 -0
mesh_augment.py +79 -0
metric.py +300 -0
metric_cd.py +190 -0
query_point.py +259 -0
test_slat_flow_128to1024_pointnet.py +403 -0
test_slat_flow_128to256_pointnet.py +403 -0
test_slat_vae_128to1024_pointnet.py +0 -0
test_slat_vae_128to1024_pointnet_vae.py +0 -0
test_slat_vae_128to1024_pointnet_vae_addhead.py +0 -0
test_slat_vae_128to1024_pointnet_vae_head.py +1339 -0
test_slat_vae_128to1024_pointnet_vae_head_woca.py +0 -0
test_slat_vae_128to256_pointnet_vae_head.py +1349 -0
test_slat_vae_128to512_pointnet_vae_head.py +1636 -0
train_slat_flow_128to1024_pointnet.py +484 -0
train_slat_vae_512_128to1024_pointnet.py +682 -0
train_slat_vae_512_128to1024_pointnet_addhead.py +788 -0
train_slat_vae_512_128to1024_pointnet_head.py +930 -0
train_slat_vae_512_128to256_pointnet_head.py +917 -0
train_slat_vae_512_128to512_pointnet_head.py +1090 -0
trellis/__init__.py +6 -0
trellis/__pycache__/__init__.cpython-310.pyc +0 -0
trellis/datasets/__init__.py +58 -0
trellis/datasets/__pycache__/__init__.cpython-310.pyc +0 -0
trellis/datasets/__pycache__/components.cpython-310.pyc +0 -0
trellis/datasets/__pycache__/sparse_structure_latent.cpython-310.pyc +0 -0
trellis/datasets/components.py +137 -0
trellis/datasets/sparse_feat2render.py +134 -0
trellis/datasets/sparse_structure.py +107 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+40w_2000-100000edge_2000-75000active.txt filter=lfs diff=lfs merge=lfs -text
+40w_2000-200000edge_2000-100000active.txt filter=lfs diff=lfs merge=lfs -text
+__pycache__/utils.bresenham_3d_array-192.py310.1.nbc filter=lfs diff=lfs merge=lfs -text

40w_2000-100000edge_2000-75000active.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6e283bf011c720c1300f7c4621c15af8360df05f0bcfbd71faf984e2d61f6e17
+size 38231752

40w_2000-200000edge_2000-100000active.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:46aae1f5d9bce421cb402a69b3b112daf586ed8f470c7de4b1e6d73adc82ef56
+size 44315087

MERGED_DATASET_count_200_2000_10000_train_2000min_100000max.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

MERGED_DATASET_filtered_2000-100000edge_2000-32678active.txt ADDED Viewed

File without changes

__pycache__/dataset_triposf.cpython-310.pyc ADDED Viewed

Binary file (24.8 kB). View file

__pycache__/dataset_triposf_head.cpython-310.pyc ADDED Viewed

Binary file (26.1 kB). View file

__pycache__/query_point.cpython-310.pyc ADDED Viewed

Binary file (8.77 kB). View file

__pycache__/utils.bresenham_3d_array-192.py310.1.nbc ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d783005c5692d0ddfb1fc45298776ac767bde1326be014a6e65e76e41aeef898
+size 113601

__pycache__/utils.bresenham_3d_array-192.py310.nbi ADDED Viewed

Binary file (1.59 kB). View file

__pycache__/utils.cpython-310.pyc ADDED Viewed

Binary file (24.5 kB). View file

__pycache__/vertex_encoder.cpython-310.pyc ADDED Viewed

Binary file (19.2 kB). View file

config_edge_1024_error_8enc_8dec_woself_finetune_128to1024_addhead.yaml ADDED Viewed

	@@ -0,0 +1,82 @@

+"dataset":
+  "base_resolution": 1024
+  "cache_dir": "/gemini/user/private/zhaotianhao/dataset_cache/MERGED_DATASET_count_200_2000_100000_128to1024_819200_head"
+  "cache_filter_path": "/gemini/user/private/zhaotianhao/Triposf/MERGED_DATASET_filtered_2000-75000edge_2000-326780active.txt"
+  "filter_active_voxels": true
+  "min_resolution": 128
+  "n_train_samples": 1024
+  "path": "/gemini/user/private/zhaotianhao/data/MERGED_DATASET_count_200_2000_100000/train"
+  "renders_dir": "None"
+  "sample_type": "dora"
+"experiment":
+  "save_dir": "/gemini/user/private/zhaotianhao/checkpoints/vae/train_9w_200_2000face/shapenet_bs1_128to1024_wolabel_dir_sorted_dora_bigger_addhead"
+"model":
+  "add_block_embed": true
+  "add_direction": false
+  "add_edge_glb_feats": true
+  "attn_first": false
+  "block_size": 16
+  "decoder_blocks_edge":
+  - "in_channels": 768
+    "model_channels": 768
+    "num_blocks": 0
+    "num_heads": 0
+    "out_channels": 128
+    "resolution": 128
+  - "in_channels": 128
+    "model_channels": 128
+    "num_blocks": 0
+    "num_heads": 0
+    "out_channels": 64
+    "resolution": 256
+  - "in_channels": 64
+    "model_channels": 64
+    "num_blocks": 0
+    "num_heads": 0
+    "out_channels": 32
+    "resolution": 512
+  "decoder_blocks_vtx":
+  - "in_channels": 768
+    "model_channels": 768
+    "num_blocks": 0
+    "num_heads": 0
+    "out_channels": 128
+    "resolution": 128
+  - "in_channels": 128
+    "model_channels": 128
+    "num_blocks": 0
+    "num_heads": 0
+    "out_channels": 64
+    "resolution": 256
+  - "in_channels": 64
+    "model_channels": 64
+    "num_blocks": 0
+    "num_heads": 0
+    "out_channels": 32
+    "resolution": 512
+  "embed_dim": 1024
+  "encoder_blocks":
+  - "in_channels": 1024
+    "model_channels": 768
+    "num_blocks": 12
+    "num_heads": 12
+    "out_channels": 768
+  "in_channels": 1024
+  "latent_dim": 16
+  "model_channels": 384
+  "multires": 12
+  "pos_encoding": "nerf"
+  "pred_direction": true
+  "relative_embed": true
+  "using_attn": false
+"training":
+  "batch_size": 1
+  "checkpoint_path": "/gemini/user/private/zhaotianhao/checkpoints/vae/train_9w_200_2000face/shapenet_bs1_128to1024_wolabel_dir_sorted_dora_bigger/checkpoint_epoch14_batch10432_loss0.1438.pt"
+  "from_pretrained": true
+  "gamma": 0.95
+  "lr": 0.0001
+  "max_epochs": 100
+  "num_workers": 12
+  "save_every": 1
+  "start_epoch": 1
+  "step_size": 1

config_edge_1024_error_8enc_8dec_woself_finetune_128to1024_head_woca.yaml ADDED Viewed

	@@ -0,0 +1,97 @@

+dataset:
+  path: /gemini/user/private/zhaotianhao/data/MERGED_DATASET_count_200_2000_100000/train
+  cache_dir: /gemini/user/private/zhaotianhao/dataset_cache/MERGED_DATASET_count_200_2000_100000_128to1024_819200_head
+  renders_dir: None
+  filter_active_voxels: true
+  cache_filter_path: /gemini/user/private/zhaotianhao/Triposf/MERGED_DATASET_filtered_2000-75000edge_2000-326780active.txt
+  base_resolution: 1024
+  min_resolution: 128
+  n_train_samples: 1024
+  sample_type: dora
+model:
+  pred_direction: false
+  relative_embed: true
+  using_attn: false
+  add_block_embed: true
+  multires: 12
+  embed_dim: 1024 #64
+  in_channels: 1024 #64
+  model_channels: 384
+  latent_dim: 16
+  block_size: 16
+  pos_encoding: 'nerf'
+  # pos_encoding: 'embedding'
+  attn_first: false
+  add_edge_glb_feats: true
+  add_direction: false
+  encoder_blocks:
+  - in_channels: 1024
+    model_channels: 512
+    num_blocks: 8
+    num_heads: 8
+    out_channels: 512
+  decoder_blocks_edge:
+  - in_channels: 512
+    model_channels: 512
+    num_blocks: 0
+    num_heads: 0
+    out_channels: 256
+    resolution: 128
+  - in_channels: 256
+    model_channels: 256
+    num_blocks: 0
+    num_heads: 0
+    out_channels: 128
+    resolution: 256
+  - in_channels: 128
+    model_channels: 128
+    num_blocks: 0
+    num_heads: 0
+    out_channels: 64
+    resolution: 512
+  decoder_blocks_vtx:
+  - in_channels: 512
+    model_channels: 512
+    num_blocks: 0
+    num_heads: 0
+    out_channels: 256
+    resolution: 128
+  - in_channels: 256
+    model_channels: 256
+    num_blocks: 0
+    num_heads: 0
+    out_channels: 128
+    resolution: 256
+  - in_channels: 128
+    model_channels: 128
+    num_blocks: 0
+    num_heads: 0
+    out_channels: 64
+    resolution: 512
+training:
+  batch_size: 2
+  lr: 1.e-4
+  step_size: 1
+  gamma: 0.95
+  save_every: 5
+  start_epoch: 0
+  max_epochs: 200
+  num_workers: 4
+  from_pretrained: false
+  checkpoint_path: /root/Trisf/experiments_edge/vae/train_9w_200_2000face/9w_128to1024/checkpoint_epoch14_batch5216_loss0.2745.pt
+experiment:
+  save_dir: "/root/Trisf/experiments_edge/vae/{dataset_name}_9w_200_2000face/shapenet_bs{batch_size}_128to1024_wolabel_dir_sorted_dora_small_allasyloss"

config_edge_1024_error_8enc_8dec_woself_finetune_128to512.yaml ADDED Viewed

	@@ -0,0 +1,101 @@

+dataset:
+  # path: /gemini/user/private/zhaotianhao/data/MERGED_DATASET_count_200_2000_100000/train
+  # cache_dir: /gemini/user/private/zhaotianhao/dataset_cache/MERGED_DATASET_count_200_2000_100000_128to1024_819200_head
+  path: /home/tiger/yy/src/unique_files_glb_under6000face_2degree_30ratio_0.01
+  cache_dir: /home/tiger/yy/src/dataset_cache/unique_files_glb_under6000face_2degree_30ratio_0.01
+  renders_dir: None
+  filter_active_voxels: true
+  cache_filter_path: /home/tiger/yy/src/Michelangelo-master/40w_2000-200000edge_2000-100000active.txt # 5epoch
+  # cache_filter_path: /home/tiger/yy/src/Michelangelo-master/40w_2000-100000edge_2000-75000active.txt # 0-4 epoch
+  base_resolution: 512
+  min_resolution: 128
+  n_train_samples: 1024
+  sample_type: dora
+model:
+  pred_direction: false
+  relative_embed: true
+  using_attn: false
+  add_block_embed: true
+  multires: 12
+  embed_dim: 1024 #64
+  in_channels: 1024 #64
+  model_channels: 384
+  latent_dim: 16
+  block_size: 16
+  pos_encoding: 'nerf'
+  attn_first: false
+  add_edge_glb_feats: true
+  add_direction: false
+  encoder_blocks:
+  - in_channels: 1024
+    model_channels: 512
+    num_blocks: 8
+    num_heads: 8
+    out_channels: 512
+  decoder_blocks_edge:
+  - in_channels: 512
+    model_channels: 512
+    num_blocks: 0
+    num_heads: 0
+    out_channels: 256
+    resolution: 128
+  - in_channels: 256
+    model_channels: 256
+    num_blocks: 0
+    num_heads: 0
+    out_channels: 128
+    resolution: 256
+  # - in_channels: 128
+  #   model_channels: 128
+  #   num_blocks: 0
+  #   num_heads: 0
+  #   out_channels: 64
+  #   resolution: 512
+  decoder_blocks_vtx:
+  - in_channels: 512
+    model_channels: 512
+    num_blocks: 0
+    num_heads: 0
+    out_channels: 256
+    resolution: 128
+  - in_channels: 256
+    model_channels: 256
+    num_blocks: 0
+    num_heads: 0
+    out_channels: 128
+    resolution: 256
+  # - in_channels: 128
+  #   model_channels: 128
+  #   num_blocks: 0
+  #   num_heads: 0
+  #   out_channels: 64
+  #   resolution: 512
+training:
+  batch_size: 2
+  lr: 4.e-5
+  step_size: 1
+  gamma: 0.95
+  save_every: 1
+  start_epoch: 0
+  max_epochs: 10
+  num_workers: 32
+  from_pretrained: true
+  # checkpoint_path: /home/tiger/yy/src/checkpoint_epoch2_batch30000_loss0.1829.pt
+  checkpoint_path: /home/tiger/yy/src/checkpoints/vae/unique_files_glb_under6000face_2degree_30ratio_0.01/shapenet_bs2_128to512_wolabel_dir_sorted_dora_small/checkpoint_epoch7_batch10000_loss0.1175.pt
+experiment:
+  save_dir: "/home/tiger/yy/src/checkpoints/vae/{dataset_name}/shapenet_bs{batch_size}_128to512_wolabel_dir_sorted_dora_small_lowlr"

config_slat_flow_128to256_pointnet_test.yaml ADDED Viewed

	@@ -0,0 +1,124 @@

+model:
+  pred_direction: true
+  relative_embed: true
+  using_attn: false
+  add_block_embed: true
+  multires: 12
+  embed_dim: 1024
+  in_channels: 1024
+  model_channels: 384
+  latent_dim: 16
+  block_size: 16
+  pos_encoding: 'nerf'
+  attn_first: false
+  add_edge_glb_feats: true
+  add_direction: false
+  encoder_blocks:
+  - in_channels: 1024
+    model_channels: 512
+    num_blocks: 8
+    num_heads: 8
+    out_channels: 512
+  decoder_blocks_edge:
+  - in_channels: 512
+    model_channels: 512
+    num_blocks: 0
+    num_heads: 0
+    out_channels: 128
+    resolution: 128
+  # - in_channels: 128
+  #   model_channels: 128
+  #   num_blocks: 0
+  #   num_heads: 0
+  #   out_channels: 64
+  #   resolution: 256
+  # - in_channels: 64
+  #   model_channels: 64
+  #   num_blocks: 0
+  #   num_heads: 0
+  #   out_channels: 32
+  #   resolution: 512
+  decoder_blocks_vtx:
+  - in_channels: 512
+    model_channels: 512
+    num_blocks: 0
+    num_heads: 0
+    out_channels: 128
+    resolution: 128
+  # - in_channels: 128
+  #   model_channels: 128
+  #   num_blocks: 0
+  #   num_heads: 0
+  #   out_channels: 64
+  #   resolution: 256
+  # - in_channels: 64
+  #   model_channels: 64
+  #   num_blocks: 0
+  #   num_heads: 0
+  #   out_channels: 32
+  #   resolution: 512
+"t_schedule":
+    "name": "logitNormal"
+    "args":
+        "mean": 1.0
+        "std": 1.0
+"sigma_min": 1.e-5
+training:
+  batch_size: 1
+  lr: 1.e-4
+  step_size: 20
+  gamma: 0.95
+  save_every: 2000
+  start_epoch: 0
+  max_epochs: 300000
+  num_workers: 4
+  output_dir: /gemini/user/private/zhaotianhao/checkpoints/output_slat_flow_matching_active/8w_128to256_head_rope
+  clip_model_path: "/gemini/user/private/zhaotianhao/clip-vit-large-patch14"
+  dinov2_model_path: "/gemini/user/private/zhaotianhao/dinov2-large"
+  vae_path: /gemini/user/private/zhaotianhao/checkpoints/vae/train_9w_200_2000face/shapenet_bs2_128to256_dir_sorted_dora_head_small/checkpoint_epoch13_batch6000_loss0.1381.pt
+  denoiser_checkpoint_path: false
+dataset:
+  path: /gemini/user/private/zhaotianhao/data/trellis_clean_mesh
+  path: /gemini/user/private/zhaotianhao/data/MERGED_DATASET_count_200_2000_100000/train
+  cache_dir: /gemini/user/private/zhaotianhao/dataset_cache/MERGED_DATASET_count_200_2000_100000_128to256_819200_head
+  renders_dir: None
+  filter_active_voxels: false
+  cache_filter_path: None
+  base_resolution: 1024
+  min_resolution: 128
+  n_train_samples: 1024
+  sample_type: dora
+flow:
+  "resolution": 128
+  "in_channels": 16
+  "out_channels": 16
+  "model_channels": 768
+  "cond_channels": 1024
+  "num_blocks": 8
+  "num_heads": 8
+  "mlp_ratio": 4
+  "patch_size": 2
+  "num_io_res_blocks": 2
+  "io_block_channels": [128]
+  "pe_mode": "rope"
+  "qk_rms_norm": true
+  "qk_rms_norm_cross": false
+  "use_fp16": false

dataset_triposf.py ADDED Viewed

	@@ -0,0 +1,924 @@

+import torch
+import os
+from typing import *
+import trimesh
+import numpy as np
+from torch.utils.data import Dataset
+import torch.nn.functional as F
+# from utils import quantize_vertices
+from utils import get_voxel_line
+import random
+import hashlib
+import json
+from tqdm import tqdm
+from torchvision import transforms
+from PIL import Image
+import rembg
+import open3d as o3d
+from trimesh import grouping
+def normalize_mesh(mesh_path):
+    scene = trimesh.load(mesh_path, process=False, force='scene')
+    meshes = []
+    for node_name in scene.graph.nodes_geometry:
+        geom_name = scene.graph[node_name][1]
+        geometry = scene.geometry[geom_name]
+        transform = scene.graph[node_name][0]
+        if isinstance(geometry, trimesh.Trimesh):
+            geometry.apply_transform(transform)
+            meshes.append(geometry)
+    mesh = trimesh.util.concatenate(meshes)
+    center = mesh.bounding_box.centroid
+    mesh.apply_translation(-center)
+    scale = max(mesh.bounding_box.extents)
+    mesh.apply_scale(2.0 / scale * 0.5)
+    return mesh
+def quantize_vertices(vertices: torch.Tensor, res: int):
+    """
+    Quantize normalized vertices (range approx [-0.5, 0.5]) to integer grid [0, res-1].
+    """
+    normalized = vertices + 0.5
+    scaled = normalized * res
+    quantized = torch.floor(scaled).clamp(0, res - 1).int()
+    return quantized
+def sample_edges_dora(tm_mesh, n_samples):
+    adj_faces = tm_mesh.face_adjacency
+    adj_edges = tm_mesh.face_adjacency_edges
+    internal_data = None
+    if len(adj_faces) > 0:
+        n0 = tm_mesh.face_normals[adj_faces[:, 0]]
+        n1 = tm_mesh.face_normals[adj_faces[:, 1]]
+        sum_normals = n0 + n1
+        norms = np.linalg.norm(sum_normals, axis=1, keepdims=True)
+        norms[norms < 1e-6] = 1.0
+        int_normals = sum_normals / norms
+        int_v_start = tm_mesh.vertices[adj_edges[:, 0]]
+        int_v_end = tm_mesh.vertices[adj_edges[:, 1]]
+        faces_pair = tm_mesh.faces[adj_faces]
+        sum_face_indices = np.sum(faces_pair, axis=2)
+        sum_edge_indices = np.sum(adj_edges, axis=1)
+        unique_idx_0 = sum_face_indices[:, 0] - sum_edge_indices
+        unique_idx_1 = sum_face_indices[:, 1] - sum_edge_indices
+        v_unique_0 = tm_mesh.vertices[unique_idx_0]
+        v_unique_1 = tm_mesh.vertices[unique_idx_1]
+        int_v_virtual = (v_unique_0 + v_unique_1) * 0.5
+        internal_data = (int_v_start, int_v_end, int_normals, int_v_virtual)
+    edges_sorted = tm_mesh.edges_sorted
+    if len(edges_sorted) == 0:
+        boundary_data = None
+    else:
+        boundary_group = grouping.group_rows(edges_sorted, require_count=1)
+        boundary_data = None
+        if len(boundary_group) > 0:
+            boundary_indices = np.concatenate([np.atleast_1d(g) for g in boundary_group])
+            boundary_face_indices = boundary_indices // 3
+            bnd_normals = tm_mesh.face_normals[boundary_face_indices]
+            bnd_edge_v_indices = edges_sorted[boundary_indices]
+            bnd_v_start = tm_mesh.vertices[bnd_edge_v_indices[:, 0]]
+            bnd_v_end = tm_mesh.vertices[bnd_edge_v_indices[:, 1]]
+            boundary_face_v_indices = tm_mesh.faces[boundary_face_indices]
+            sum_face = np.sum(boundary_face_v_indices, axis=1)
+            sum_edge = np.sum(bnd_edge_v_indices, axis=1)
+            unique_idx = sum_face - sum_edge
+            bnd_v_virtual = tm_mesh.vertices[unique_idx]
+            boundary_data = (bnd_v_start, bnd_v_end, bnd_normals, bnd_v_virtual)
+    if internal_data is None and boundary_data is None:
+        return None, None, None
+    parts_start, parts_end, parts_norm, parts_virt = [], [], [], []
+    if internal_data is not None:
+        parts_start.append(internal_data[0])
+        parts_end.append(internal_data[1])
+        parts_norm.append(internal_data[2])
+        parts_virt.append(internal_data[3])
+    if boundary_data is not None:
+        parts_start.append(boundary_data[0])
+        parts_end.append(boundary_data[1])
+        parts_norm.append(boundary_data[2])
+        parts_virt.append(boundary_data[3])
+    if not parts_start:
+        return None, None, None
+    all_v_start = np.concatenate(parts_start, axis=0)
+    all_v_end = np.concatenate(parts_end, axis=0)
+    all_normals = np.concatenate(parts_norm, axis=0)
+    all_v_virtual = np.concatenate(parts_virt, axis=0)
+    edge_vectors = all_v_end - all_v_start
+    edge_lengths = np.linalg.norm(edge_vectors, axis=1)
+    total_length = np.sum(edge_lengths)
+    if total_length < 1e-9:
+        probs = np.ones(len(edge_lengths)) / len(edge_lengths)
+    else:
+        probs = edge_lengths / total_length
+        probs = probs / probs.sum()
+    chosen_indices = np.random.choice(len(edge_lengths), size=n_samples, p=probs)
+    t = np.random.rand(n_samples, 1)
+    sel_v_start = all_v_start[chosen_indices]
+    sel_v_end = all_v_end[chosen_indices]
+    sel_normals = all_normals[chosen_indices]
+    sel_v_virtual = all_v_virtual[chosen_indices]
+    sampled_points = sel_v_start + (sel_v_end - sel_v_start) * t
+    vertex_triplets = np.stack([sel_v_start, sel_v_end, sel_v_virtual], axis=1).astype(np.float32)
+    return sampled_points.astype(np.float32), sel_normals.astype(np.float32), vertex_triplets
+def load_quantized_mesh_dora(
+    mesh_path,
+    mesh_load=None,
+    volume_resolution=256,
+    use_normals=True,
+    pc_sample_number=4096000,
+    edge_sample_ratio=0.2
+):
+    cube_dilate = np.array([
+        [0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 0, -1], [0, -1, 0], [0, 1, 1], [0, -1, 1], [0, 1, -1], [0, -1, -1],
+        [1, 0, 0], [1, 0, 1], [1, 1, 0], [1, 0, -1], [1, -1, 0], [1, 1, 1], [1, -1, 1], [1, 1, -1], [1, -1, -1],
+        [-1, 0, 0], [-1, 0, 1], [-1, 1, 0], [-1, 0, -1], [-1, -1, 0], [-1, 1, 1], [-1, -1, 1], [-1, 1, -1], [-1, -1, -1]
+    ]) / (volume_resolution * 4 - 1)
+    if mesh_load is None:
+        mesh_o3d = o3d.io.read_triangle_mesh(mesh_path)
+        vertices = np.clip(np.asarray(mesh_o3d.vertices), -0.5 + 1e-6, 0.5 - 1e-6)
+        faces = np.asarray(mesh_o3d.triangles)
+        mesh_o3d.vertices = o3d.utility.Vector3dVector(vertices)
+    else:
+        vertices = np.clip(np.asarray(mesh_load.vertices), -0.5 + 1e-6, 0.5 - 1e-6)
+        faces = np.asarray(mesh_load.faces)
+        mesh_o3d = o3d.geometry.TriangleMesh()
+        mesh_o3d.vertices = o3d.utility.Vector3dVector(vertices)
+        mesh_o3d.triangles = o3d.utility.Vector3iVector(faces)
+    tm_mesh = trimesh.Trimesh(vertices=vertices, faces=faces, process=False)
+    n_edge_samples = int(pc_sample_number * edge_sample_ratio)
+    p_edge, n_edge, triplets_edge = sample_edges_dora(tm_mesh, n_edge_samples)
+    if p_edge is None:
+        # print('p_edge is none!')
+        n_surface_samples = pc_sample_number
+    else:
+        # print('p_edge is right!')
+        n_surface_samples = pc_sample_number - n_edge_samples
+    p_surf, idx_surf = tm_mesh.sample(n_surface_samples, return_index=True)
+    p_surf = p_surf.astype(np.float32)
+    n_surf = tm_mesh.face_normals[idx_surf].astype(np.float32)
+    v_indices_surf = faces[idx_surf]
+    triplets_surf = vertices[v_indices_surf]
+    if p_edge is None:
+        final_points = p_surf
+        final_normals = n_surf
+        final_triplets = triplets_surf
+    else:
+        final_points = np.concatenate([p_surf, p_edge.astype(np.float32)], axis=0)
+        if use_normals:
+            final_normals = np.concatenate([n_surf, n_edge.astype(np.float32)], axis=0)
+        final_triplets = np.concatenate([triplets_surf, triplets_edge], axis=0)
+    voxelization_mesh = o3d.geometry.VoxelGrid.create_from_triangle_mesh_within_bounds(
+            mesh_o3d,
+            voxel_size=1. / volume_resolution,
+            min_bound=[-0.5, -0.5, -0.5],
+            max_bound=[0.5, 0.5, 0.5]
+        )
+    voxel_mesh = np.asarray([voxel.grid_index for voxel in voxelization_mesh.get_voxels()])
+    voxelization_points = o3d.geometry.VoxelGrid.create_from_point_cloud_within_bounds(
+            o3d.geometry.PointCloud(
+                o3d.utility.Vector3dVector(
+                    np.clip(
+                        (final_points[np.newaxis] + cube_dilate[..., np.newaxis, :]).reshape(-1, 3),
+                        -0.5 + 1e-6, 0.5 - 1e-6)
+                    )
+                ),
+            voxel_size=1. / volume_resolution,
+            min_bound=[-0.5, -0.5, -0.5],
+            max_bound=[0.5, 0.5, 0.5]
+        )
+    voxel_points = np.asarray([voxel.grid_index for voxel in voxelization_points.get_voxels()])
+    voxels = torch.Tensor(np.unique(np.concatenate([voxel_mesh, voxel_points]), axis=0))
+    features_list = [torch.from_numpy(final_points)]
+    if use_normals:
+        features_list.append(torch.from_numpy(final_normals))
+    view_dtype = np.dtype((np.void, final_triplets.dtype.itemsize * final_triplets.shape[-1]))
+    v_view = final_triplets.view(view_dtype).squeeze(-1)
+    sort_idx = np.argsort(v_view, axis=1)
+    batch_indices = np.arange(final_triplets.shape[0])[:, None]
+    v_sorted = final_triplets[batch_indices, sort_idx]
+    v1 = v_sorted[:, 0, :]
+    v2 = v_sorted[:, 1, :]
+    v3 = v_sorted[:, 2, :]
+    dir1 = v1 - final_points
+    dir2 = v2 - final_points
+    dir3 = v3 - final_points
+    features_list.append(torch.Tensor(dir1.astype(np.float32)))
+    features_list.append(torch.Tensor(dir2.astype(np.float32)))
+    features_list.append(torch.Tensor(dir3.astype(np.float32)))
+    points_sample = torch.cat(features_list, axis=-1)
+    return voxels, points_sample
+def load_quantized_mesh_original(
+    mesh_path,
+    mesh_load=None,
+    volume_resolution=256,
+    use_normals=True,
+    pc_sample_number=4096000,
+):
+    cube_dilate = np.array(
+            [
+                [0, 0, 0],
+                [0, 0, 1],
+                [0, 1, 0],
+                [0, 0, -1],
+                [0, -1, 0],
+                [0, 1, 1],
+                [0, -1, 1],
+                [0, 1, -1],
+                [0, -1, -1],
+                [1, 0, 0],
+                [1, 0, 1],
+                [1, 1, 0],
+                [1, 0, -1],
+                [1, -1, 0],
+                [1, 1, 1],
+                [1, -1, 1],
+                [1, 1, -1],
+                [1, -1, -1],
+                [-1, 0, 0],
+                [-1, 0, 1],
+                [-1, 1, 0],
+                [-1, 0, -1],
+                [-1, -1, 0],
+                [-1, 1, 1],
+                [-1, -1, 1],
+                [-1, 1, -1],
+                [-1, -1, -1],
+            ]
+        ) / (volume_resolution * 4 - 1)
+    if mesh_load is None:
+        mesh = o3d.io.read_triangle_mesh(mesh_path)
+        vertices = np.clip(np.asarray(mesh.vertices), -0.5 + 1e-6, 0.5 - 1e-6)
+        faces = np.asarray(mesh.triangles)
+        mesh.vertices = o3d.utility.Vector3dVector(vertices)
+    else:
+        vertices = np.clip(np.asarray(mesh_load.vertices), -0.5 + 1e-6, 0.5 - 1e-6)
+        faces = np.asarray(mesh_load.faces)
+        mesh = o3d.geometry.TriangleMesh()
+        mesh.vertices = o3d.utility.Vector3dVector(vertices)
+        mesh.triangles = o3d.utility.Vector3iVector(faces)
+    voxelization_mesh = o3d.geometry.VoxelGrid.create_from_triangle_mesh_within_bounds(
+            mesh,
+            voxel_size=1. / volume_resolution,
+            min_bound=[-0.5, -0.5, -0.5],
+            max_bound=[0.5, 0.5, 0.5]
+        )
+    voxel_mesh = np.asarray([voxel.grid_index for voxel in voxelization_mesh.get_voxels()])
+    points_normals_sample = trimesh.Trimesh(vertices=vertices, faces=faces).sample(count=pc_sample_number, return_index=True)
+    points_xyz_np = points_normals_sample[0].astype(np.float32)
+    points_sample = points_normals_sample[0].astype(np.float32)
+    face_indices = points_normals_sample[1]
+    voxelization_points = o3d.geometry.VoxelGrid.create_from_point_cloud_within_bounds(
+            o3d.geometry.PointCloud(
+                o3d.utility.Vector3dVector(
+                    np.clip(
+                        (points_sample[np.newaxis] + cube_dilate[..., np.newaxis, :]).reshape(-1, 3),
+                        -0.5 + 1e-6, 0.5 - 1e-6)
+                    )
+                ),
+            voxel_size=1. / volume_resolution,
+            min_bound=[-0.5, -0.5, -0.5],
+            max_bound=[0.5, 0.5, 0.5]
+        )
+    voxel_points = np.asarray([voxel.grid_index for voxel in voxelization_points.get_voxels()])
+    voxels = torch.Tensor(np.unique(np.concatenate([voxel_mesh, voxel_points]), axis=0))
+    features_list = [torch.from_numpy(points_xyz_np)]
+    if use_normals:
+        mesh.compute_triangle_normals()
+        normals_sample = np.asarray(
+                            mesh.triangle_normals
+                        )[points_normals_sample[1]].astype(np.float32)
+        # points_sample = torch.cat((torch.Tensor(points_sample), torch.Tensor(normals_sample)), axis=-1)
+        features_list.append(torch.from_numpy(normals_sample))
+    ########################################
+    # add direction to three vtx
+    ########################################
+    ## wo sort
+    # sampled_face_v_indices = faces[face_indices]
+    # v1 = vertices[sampled_face_v_indices[:, 0]]
+    # v2 = vertices[sampled_face_v_indices[:, 1]]
+    # v3 = vertices[sampled_face_v_indices[:, 2]]
+    # w sort
+    sampled_face_v_indices = faces[face_indices]
+    v_batch = np.stack([
+        vertices[sampled_face_v_indices[:, 0]],
+        vertices[sampled_face_v_indices[:, 1]],
+        vertices[sampled_face_v_indices[:, 2]]
+    ], axis=1)
+    view_dtype = np.dtype((np.void, v_batch.dtype.itemsize * v_batch.shape[-1]))
+    v_view = v_batch.view(view_dtype).squeeze(-1) # 变成 (N, 3) 的 void
+    sort_idx = np.argsort(v_view, axis=1) # (N, 3)
+    batch_indices = np.arange(v_batch.shape[0])[:, None]
+    v_sorted = v_batch[batch_indices, sort_idx] # (N, 3, 3)
+    v1 = v_sorted[:, 0, :]
+    v2 = v_sorted[:, 1, :]
+    v3 = v_sorted[:, 2, :]
+    # --------------------
+    dir1 = v1 - points_xyz_np
+    dir2 = v2 - points_xyz_np
+    dir3 = v3 - points_xyz_np
+    features_list.append(torch.Tensor(dir1.astype(np.float32)))
+    features_list.append(torch.Tensor(dir2.astype(np.float32)))
+    features_list.append(torch.Tensor(dir3.astype(np.float32)))
+    points_sample = torch.cat(features_list, axis=-1)
+    ########################################
+    # add direction to three vtx
+    ########################################
+    return voxels, points_sample
+def get_sha256(filepath: str) -> str:
+    sha256_hash = hashlib.sha256()
+    with open(filepath, "rb") as f:
+        for byte_block in iter(lambda: f.read(4096), b""):
+            sha256_hash.update(byte_block)
+    return sha256_hash.hexdigest()
+class VoxelVertexDataset_edge(Dataset):
+    def __init__(self,
+                root_dir: str,
+                base_resolution: int = 256,
+                min_resolution: int = 128,
+                img_res: int = 518,
+                cache_dir: str = "dataset_cache_test",
+                renders_dir: str = '/HOME/paratera_xy/pxy1054/HDD_POOL/Trisf/data/mesh_render_img/objaverse_200_2000/renders_cond',
+                process_img: bool = False,
+                n_pre_samples: int = 1024,
+                active_voxel_res: int = 64,
+                pc_sample_number: int = 409600,
+                filter_active_voxels: bool = False, #####
+                min_active_voxels: int = 2000,
+                max_active_voxels: int = 40000,
+                cache_filter_path: str = "/HOME/paratera_xy/pxy1054/HDD_POOL/Triposf/data/filter_name/objaverse_200_2000_2000min_25000max.txt",
+                sample_type: str = 'uniform',
+        ):
+        self.root_dir = root_dir
+        self.cache_dir = cache_dir
+        self.img_res = img_res
+        self.renders_dir = renders_dir
+        self.process_img = process_img
+        self.filter_active_voxels=filter_active_voxels
+        self.min_active_voxels=min_active_voxels
+        self.max_active_voxels=max_active_voxels
+        self.active_voxel_res = active_voxel_res
+        self.pc_sample_number = pc_sample_number
+        self.sample_type = sample_type
+        # self.image_transform = transforms.ToTensor()
+        self.image_transform = transforms.Compose([
+            transforms.ToTensor(),
+            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+        ])
+        os.makedirs(cache_dir, exist_ok=True)
+        assert (base_resolution & (base_resolution - 1)) == 0, "Resolution must be power of 2"
+        assert (min_resolution & (min_resolution - 1)) == 0, "Resolution must be power of 2"
+        self.res_levels = [
+            2**i for i in range(
+                int(np.log2(min_resolution)),
+                int(np.log2(base_resolution)) + 1
+            )
+        ]
+        if self.active_voxel_res is not None and self.active_voxel_res not in self.res_levels:
+            self.res_levels.append(active_voxel_res)
+            self.res_levels.sort()
+        all_obj_files = sorted([f for f in os.listdir(root_dir) if f.endswith(('.obj', '.ply', '.glb'))])
+        if not all_obj_files:
+            raise ValueError(f"No OBJ files found in {root_dir}")
+        if self.process_img:
+            map_file_path = os.path.join(os.path.dirname(self.renders_dir), 'map.json')
+            if os.path.exists(map_file_path):
+                print(f"Loading pre-computed hash map from {map_file_path}")
+                with open(map_file_path, 'r') as f:
+                    file_map = json.load(f)
+                filename_to_hash = {item['filename']: item['sha256'] for item in file_map}
+                all_obj_hashes = [filename_to_hash.get(fname) for fname in all_obj_files]
+            else:
+                print("No hash map found. Calculating SHA256 hashes on the fly... (This may take a moment)")
+                all_obj_hashes = []
+                for fname in tqdm(all_obj_files, desc="Hashing .obj files"):
+                    fpath = os.path.join(self.root_dir, fname)
+                    all_obj_hashes.append(get_sha256(fpath))
+        else:
+            print("process_img is False, skipping SHA256 hash calculation.")
+            all_obj_hashes = [None] * len(all_obj_files)
+        if self.filter_active_voxels and cache_filter_path:
+            filtered_list_cache_path = cache_filter_path
+            if os.path.exists(filtered_list_cache_path):
+                print(f"Loading filtered BASENAMES from: {filtered_list_cache_path}")
+                basename_to_fullname_map = {os.path.splitext(f)[0]: f for f in all_obj_files}
+                with open(filtered_list_cache_path, 'r') as f:
+                    filtered_basenames = [line.strip() for line in f if line.strip()]
+                self.obj_files = []
+                for basename in filtered_basenames:
+                    if basename in basename_to_fullname_map:
+                        self.obj_files.append(basename_to_fullname_map[basename])
+                    else:
+                        print(f"[WARN] Basename '{basename}' from filter list not found in directory '{self.root_dir}'. Skipping.")
+                file_to_hash_map = dict(zip(all_obj_files, all_obj_hashes))
+                self.obj_hashes = [file_to_hash_map.get(fname) for fname in self.obj_files] # 使用 .get 更安全
+                print(f"Loaded and matched {len(self.obj_files)} samples from the filter list.")
+            else:
+                print(f"Cache filter file not found: {filtered_list_cache_path}. Proceeding with on-the-fly filtering...")
+        else:
+            self.obj_files = all_obj_files
+            self.obj_hashes = all_obj_hashes
+        if not self.obj_files:
+            raise ValueError(f"No OBJ files found in {root_dir}")
+        self.rembg_session = None
+    def _init_rembg_session_if_needed(self):
+        if self.rembg_session is None:
+            print(f"Initializing rembg session for worker {os.getpid()}...")
+            self.rembg_session = rembg.new_session(model_name='u2net')
+    def preprocess_image(self, input: Image.Image) -> Image.Image:
+        self._init_rembg_session_if_needed()
+        has_alpha = False
+        if input.mode == 'RGBA':
+            alpha = np.array(input)[:, :, 3]
+            if not np.all(alpha == 255):
+                has_alpha = True
+        if has_alpha:
+            output = input
+        else:
+            input = input.convert('RGB')
+            max_size = max(input.size)
+            scale = min(1, 1024 / max_size)
+            if scale < 1:
+                input = input.resize((int(input.width * scale), int(input.height * scale)), Image.Resampling.LANCZOS)
+            if getattr(self, 'rembg_session', None) is None:
+                self.rembg_session = rembg.new_session('u2net')
+            output = rembg.remove(input, session=self.rembg_session)
+        output_np = np.array(output)
+        alpha = output_np[:, :, 3]
+        bbox = np.argwhere(alpha > 0.8 * 255)
+        bbox = np.min(bbox[:, 1]), np.min(bbox[:, 0]), np.max(bbox[:, 1]), np.max(bbox[:, 0])
+        center = (bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2
+        size = max(bbox[2] - bbox[0], bbox[3] - bbox[1])
+        size = int(size * 1.2)
+        bbox = center[0] - size // 2, center[1] - size // 2, center[0] + size // 2, center[1] + size // 2
+        output = output.crop(bbox)  # type: ignore
+        output = output.resize((518, 518), Image.Resampling.LANCZOS)
+        output = np.array(output).astype(np.float32) / 255
+        output = output[:, :, :3] * output[:, :, 3:4]
+        output = Image.fromarray((output * 255).astype(np.uint8))
+        return output
+    def __getitem__(self, idx: int) -> Dict[str, torch.Tensor]:
+        name = os.path.splitext(self.obj_files[idx])[0]
+        cache_path = os.path.join(self.cache_dir, f"{name}_precombined.npz")
+        sha256_hash = self.obj_hashes[idx]
+        mesh_render_dir = os.path.join(self.renders_dir, sha256_hash) if sha256_hash else ""
+        image_path = ''
+        if mesh_render_dir and os.path.isdir(mesh_render_dir):
+            try:
+                render_files = [f for f in os.listdir(mesh_render_dir) if f.endswith('.png')]
+                if render_files:
+                    image_path = os.path.join(mesh_render_dir, random.choice(render_files))
+            except OSError as e:
+                print(f"[WARN] Could not access render directory {mesh_render_dir}: {e}")
+        if self.process_img:
+            try:
+                if image_path and os.path.exists(image_path):
+                    image_obj = self.image_transform(self.preprocess_image(Image.open(image_path)).convert('RGB'))
+                else:
+                    image_obj = self.image_transform(Image.fromarray(np.zeros((self.img_res, self.img_res, 3), dtype=np.uint8)).convert('RGB'))
+            except Exception as e:
+                image_obj = self.image_transform(Image.fromarray(np.zeros((self.img_res, self.img_res, 3), dtype=np.uint8)).convert('RGB'))
+                print(f'Error processing image {image_path}: {e}')
+        if os.path.exists(cache_path):
+            try:
+                loaded = np.load(cache_path, allow_pickle=True)
+                data = {
+                    'original_faces': torch.from_numpy(loaded['original_faces']),
+                    'original_vertices': torch.from_numpy(loaded['original_vertices']),
+                }
+                for res in self.res_levels:
+                    # Load standard voxel data
+                    if f'combined_voxels_{res}' in loaded:
+                        data[f'combined_voxels_{res}'] = torch.from_numpy(loaded[f'combined_voxels_{res}'])
+                        data[f'combined_voxel_labels_{res}'] = torch.from_numpy(loaded[f'combined_voxel_labels_{res}'])
+                        data[f'gt_combined_endpoints_{res}'] = torch.from_numpy(loaded[f'gt_combined_endpoints_{res}'])
+                        data[f'gt_vertex_voxels_{res}'] = torch.from_numpy(loaded[f'gt_vertex_voxels_{res}'])
+                        data[f'gt_edge_voxels_{res}'] = torch.from_numpy(loaded[f'gt_edge_voxels_{res}'])
+                        data[f'gt_edge_endpoints_{res}'] = torch.from_numpy(loaded[f'gt_edge_endpoints_{res}'])
+                        data[f'gt_edge_errors_{res}'] = torch.from_numpy(loaded[f'gt_edge_errors_{res}'])
+                    # Load Active Voxels and Point Cloud for Local Pooling
+                    if res == self.active_voxel_res:
+                        if f'active_voxels_{res}' in loaded:
+                            data[f'active_voxels_{res}'] = torch.from_numpy(loaded[f'active_voxels_{res}'])
+                        if f'point_cloud_{res}' in loaded:
+                            data[f'point_cloud_{res}'] = torch.from_numpy(loaded[f'point_cloud_{res}'])
+                if self.process_img:
+                    data['image'] = image_obj
+                data['image_path'] = image_path
+                return data
+            except Exception as e:
+                print(f"[WARN] Corrupted NPZ cache {cache_path}, regenerating... {e}")
+                os.remove(cache_path)
+        try:
+            mesh_path = os.path.join(self.root_dir, self.obj_files[idx])
+            mesh = normalize_mesh(mesh_path)
+            if mesh.is_empty or not hasattr(mesh.vertices, 'shape') or mesh.vertices.shape[0] < 3 or not hasattr(mesh.faces, 'shape') or mesh.faces.shape[0] < 1:
+                raise ValueError("Invalid or empty mesh")
+        except Exception as e:
+            print(f"[ERROR] Failed to load mesh: {self.obj_files[idx]} | {e}")
+            return self.__getitem__((idx + 1) % len(self))
+        vertices = torch.tensor(mesh.vertices, dtype=torch.float32)
+        faces = torch.tensor(mesh.faces, dtype=torch.long)
+        data = {'original_faces': faces.clone(), 'original_vertices': vertices.clone()}
+        for res in self.res_levels:
+            quantized = quantize_vertices(vertices, res)
+            tmesh = trimesh.Trimesh(vertices=quantized.numpy(), faces=faces.numpy())
+            tmesh.merge_vertices()
+            vertex_voxels_raw = torch.from_numpy(tmesh.vertices.astype(np.int32))
+            edges_raw = tmesh.edges_unique
+            vertex_labels_raw = torch.zeros(vertex_voxels_raw.shape[0], dtype=torch.long)
+            all_edge_voxels = []
+            edge_endpoints = []
+            edge_errors = []
+            for u_idx, v_idx in edges_raw:
+                p1_grid, p2_grid = vertex_voxels_raw[u_idx].float(), vertex_voxels_raw[v_idx].float()
+                v, ep, err = get_voxel_line(p1_grid, p2_grid)
+                all_edge_voxels.extend(v)
+                edge_endpoints.extend(ep)
+                edge_errors.extend(err)
+            if all_edge_voxels:
+                edge_voxels_np = np.array(all_edge_voxels, dtype=np.int32)
+                edge_endpoints_np = np.array([np.stack(pair) for pair in edge_endpoints], dtype=np.float32)
+                edge_errors_np = np.array(edge_errors, dtype=np.float32)
+                unique_edge_voxels_np, first_indices = np.unique(edge_voxels_np, axis=0, return_index=True)
+                edge_voxels_raw = torch.from_numpy(unique_edge_voxels_np)
+                edge_labels_raw = torch.ones(len(edge_voxels_raw), dtype=torch.long)
+                edge_endpoints_raw = torch.from_numpy(edge_endpoints_np[first_indices])
+                edge_errors_raw = torch.from_numpy(edge_errors_np[first_indices])
+            else:
+                edge_voxels_raw = torch.empty(0, 3, dtype=torch.int32)
+                edge_labels_raw = torch.empty(0, dtype=torch.long)
+                edge_endpoints_raw = torch.empty(0, 2, 3, dtype=torch.float32)
+                edge_errors_raw = torch.empty(0, 3, dtype=torch.float32)
+            if res == self.active_voxel_res:
+                try:
+                    if self.sample_type == 'uniform':
+                        # triposf-style, normilize wrong
+                        ts_voxels, ts_points = load_quantized_mesh_original(
+                            mesh_path=os.path.join(self.root_dir, self.obj_files[idx]),
+                            mesh_load=mesh,
+                            volume_resolution=res,
+                            use_normals=True,
+                            pc_sample_number=self.pc_sample_number,
+                        )
+                    else:
+                        ts_voxels, ts_points = load_quantized_mesh_dora(
+                            mesh_path=os.path.join(self.root_dir, self.obj_files[idx]),
+                            mesh_load=mesh,
+                            volume_resolution=res,
+                            use_normals=True,
+                            pc_sample_number=self.pc_sample_number,
+                            edge_sample_ratio=0.5,
+                        )
+                    # Convert types
+                    # Voxels from TripoSF are float Tensor (N, 3), convert to int32
+                    data[f'active_voxels_{res}'] = ts_voxels.int()
+                    data[f'point_cloud_{res}'] = ts_points
+                except Exception as e:
+                    print(f"[ERROR] Failed to compute active voxels/points for {name} at res {res}: {e}")
+                    data[f'active_voxels_{res}'] = torch.empty(0, 3, dtype=torch.int32)
+                    data[f'point_cloud_{res}'] = torch.empty(0, 6, dtype=torch.float32)
+            combined_voxels = torch.cat([vertex_voxels_raw, edge_voxels_raw], dim=0)
+            combined_labels = torch.cat([vertex_labels_raw, edge_labels_raw], dim=0)
+            if combined_voxels.numel() > 0:
+                unique_voxels, inverse_indices = torch.unique(combined_voxels, dim=0, return_inverse=True)
+                zero_mask = (combined_labels == 0)
+                if zero_mask.any():
+                    zero_per_unique = torch.zeros(len(unique_voxels), dtype=torch.bool)
+                    zero_per_unique.scatter_(0, inverse_indices[zero_mask], True)
+                    final_combined_labels = torch.where(zero_per_unique, 0, 1).long()
+                else:
+                    final_combined_labels = torch.ones(len(unique_voxels), dtype=torch.long)
+                if edge_voxels_raw.numel() > 0:
+                    edge_endpoint_map = {tuple(coord): ep for coord, ep in zip(edge_voxels_raw.numpy(), edge_endpoints_raw.numpy())}
+                    endpoints_arr = np.empty((len(unique_voxels), 2, 3), dtype=np.float32)
+                    unique_voxels_np = unique_voxels.numpy()
+                    for j, coord in enumerate(unique_voxels_np):
+                        coord_tuple = tuple(coord)
+                        if coord_tuple in edge_endpoint_map:
+                            endpoints_arr[j] = edge_endpoint_map[coord_tuple]
+                        else:
+                            endpoints_arr[j, 0, :] = coord
+                            endpoints_arr[j, 1, :] = coord
+                    final_combined_endpoints = torch.from_numpy(endpoints_arr)
+                else:
+                    final_combined_endpoints = unique_voxels.float().unsqueeze(1).repeat(1, 2, 1)
+            else:
+                unique_voxels = torch.empty(0, 3, dtype=torch.int32)
+                final_combined_labels = torch.empty(0, dtype=torch.long)
+                final_combined_endpoints = torch.empty(0, 2, 3, dtype=torch.float32)
+            data[f'combined_voxels_{res}'] = unique_voxels
+            data[f'combined_voxel_labels_{res}'] = final_combined_labels
+            data[f'gt_combined_endpoints_{res}'] = final_combined_endpoints.reshape(-1, 6)
+            data[f'gt_vertex_voxels_{res}'] = vertex_voxels_raw
+            data[f'gt_edge_voxels_{res}'] = edge_voxels_raw
+            data[f'gt_edge_endpoints_{res}'] = edge_endpoints_raw.reshape(-1, 6)
+            data[f'gt_edge_errors_{res}'] = edge_errors_raw
+        save_dict = {
+            'original_faces': data['original_faces'].numpy(),
+            'original_vertices': data['original_vertices'].numpy(),
+        }
+        for res in self.res_levels:
+            for key_suffix in [
+                'combined_voxels', 'combined_voxel_labels', 'gt_combined_endpoints',
+                'gt_vertex_voxels', 'gt_edge_voxels', 'gt_edge_endpoints', 'gt_edge_errors',
+            ]:
+                full_key = f'{key_suffix}_{res}'
+                if full_key in data:
+                    save_dict[full_key] = data[full_key].numpy()
+            if f'active_voxels_{res}' in data:
+                save_dict[f'active_voxels_{res}'] = data[f'active_voxels_{res}'].numpy()
+            if f'point_cloud_{res}' in data:
+                save_dict[f'point_cloud_{res}'] = data[f'point_cloud_{res}'].numpy()
+        try:
+            np.savez_compressed(cache_path, **save_dict)
+        except Exception as e:
+            print(f"[ERROR] Failed to save cache {cache_path}: {e}")
+            if os.path.exists(cache_path): os.remove(cache_path)
+        if self.process_img:
+            data['image'] = image_obj
+        data['image_path'] = image_path
+        return data
+    def __len__(self) -> int:
+        return len(self.obj_files)
+def collate_fn_pointnet(
+    batch: List[Dict[str, torch.Tensor]],
+) -> Dict[str, torch.Tensor]:
+    if not batch:
+        return {}
+    batch = [b for b in batch if b is not None]
+    if not batch:
+        return {}
+    collated = {
+        'original_faces':    [b['original_faces']    for b in batch],
+        'original_vertices': [b['original_vertices'] for b in batch],
+        'image_path':        [b['image_path']        for b in batch],
+    }
+    if 'image' in batch[0] and batch[0]['image'] is not None:
+        collated['image'] = torch.stack([b['image'] for b in batch])
+    res_levels = []
+    for k in batch[0].keys():
+        if k.startswith('gt_vertex_voxels_'):
+            try:
+                res_levels.append(int(k.split('_')[-1]))
+            except ValueError:
+                pass
+    res_levels.sort()
+    for res in res_levels:
+        all_active_voxels_list = []
+        all_point_clouds_list = []
+        all_combined_voxels_list = []
+        all_combined_labels_list = []
+        all_vertex_voxels_only   = []
+        all_edge_voxels_only     = []
+        all_edge_endpoints_only  = []
+        all_combined_endpoints   = []
+        all_combined_errors_list = []
+        layout = []
+        vtx_offset = 0
+        adj_flat_offset = 0
+        start_idx = 0
+        # Attempt to find device from first tensor
+        device = torch.device('cpu')
+        for v in batch[0].values():
+            if isinstance(v, torch.Tensor):
+                device = v.device
+                break
+        for i, sample in enumerate(batch):
+            vertex_voxels = sample.get(f'gt_vertex_voxels_{res}', torch.empty(0,3,dtype=torch.int32)).to(device)
+            vertex_labels = torch.zeros(vertex_voxels.shape[0], dtype=torch.long, device=device)
+            edge_voxels   = sample.get(f'gt_edge_voxels_{res}',   torch.empty(0,3,dtype=torch.int32)).to(device)
+            edge_labels   = torch.ones(edge_voxels.shape[0], dtype=torch.long, device=device)
+            edge_endpoints= sample.get(f'gt_edge_endpoints_{res}', torch.empty(0,6,dtype=torch.float32)).to(device)
+            edge_errors   = sample.get(f'gt_edge_errors_{res}',   torch.empty(0,3,dtype=torch.float32)).to(device)
+            vertex_errors = sample.get(f'gt_vertex_errors_{res}', torch.zeros_like(vertex_voxels, dtype=torch.float32)).to(device)
+            if vertex_voxels.numel() > 0:
+                idx = torch.full((vertex_voxels.shape[0],1), i, dtype=torch.int32, device=device)
+                all_vertex_voxels_only.append(torch.cat([idx, vertex_voxels], dim=1))
+            if edge_voxels.numel() > 0:
+                idx = torch.full((edge_voxels.shape[0],1), i, dtype=torch.int32, device=device)
+                all_edge_voxels_only.append(torch.cat([idx, edge_voxels], dim=1))
+                all_edge_endpoints_only.append(
+                    torch.cat([idx.to(torch.float32), edge_endpoints], dim=1))
+            if vertex_voxels.numel() + edge_voxels.numel() > 0:
+                combined_voxels = torch.cat([vertex_voxels, edge_voxels], dim=0)
+                combined_labels = torch.cat([vertex_labels, edge_labels], dim=0)
+                endpoints = torch.zeros(combined_voxels.size(0), 6, dtype=torch.float32, device=device)
+                if edge_voxels.numel() > 0:
+                    endpoints[-edge_voxels.size(0):] = edge_endpoints
+                if vertex_voxels.numel() > 0:
+                    endpoints[:vertex_voxels.size(0)] = vertex_voxels.repeat(1,2).float()
+                combined_errors = torch.cat([vertex_errors, edge_errors], dim=0)
+                batch_idx_int = torch.full((combined_voxels.shape[0],1), i, dtype=torch.int32, device=device)
+                all_combined_voxels_list.append(torch.cat([batch_idx_int, combined_voxels], dim=1))
+                all_combined_labels_list.append(combined_labels)
+                batch_idx_float = batch_idx_int.to(torch.float32)
+                all_combined_endpoints.append(torch.cat([batch_idx_float, endpoints], dim=1))
+                all_combined_errors_list.append(torch.cat([batch_idx_float, combined_errors], dim=1))
+                layout.append(slice(start_idx, start_idx + combined_voxels.shape[0]))
+                start_idx += combined_voxels.shape[0]
+            else:
+                layout.append(slice(start_idx, start_idx))
+            # Active Voxels (Sparse Coords)
+            active_voxels = sample.get(f'active_voxels_{res}', torch.empty(0, 3, dtype=torch.int32)).to(device)
+            if active_voxels.numel() > 0:
+                idx = torch.full((active_voxels.shape[0], 1), i, dtype=torch.int32, device=device)
+                all_active_voxels_list.append(torch.cat([idx, active_voxels], dim=1))
+            # ==========================================
+            # Modified Section: Collect Point Clouds
+            # ==========================================
+            # pc = sample.get(f'point_cloud_{res}', torch.empty(0, 6, dtype=torch.float32)).to(device)
+            pc = sample.get(f'point_cloud_{res}', torch.empty(0, 15, dtype=torch.float32)).to(device)
+            # We expect all samples to have point clouds if res == active_voxel_res
+            if pc.numel() > 0:
+                all_point_clouds_list.append(pc)
+        collated[f'layout_{res}'] = layout
+        def cat_or_empty(lst, shape, dtype):
+            return torch.cat(lst, dim=0) if lst else torch.empty(shape, dtype=dtype, device=device)
+        collated[f'combined_voxels_{res}']         = cat_or_empty(all_combined_voxels_list,(0,4),torch.int32)
+        collated[f'combined_voxel_labels_{res}']   = cat_or_empty(all_combined_labels_list,(0,),torch.long)
+        collated[f'gt_vertex_voxels_{res}']        = cat_or_empty(all_vertex_voxels_only,(0,4),torch.int32)
+        collated[f'gt_edge_voxels_{res}']          = cat_or_empty(all_edge_voxels_only,(0,4),torch.int32)
+        collated[f'gt_edge_endpoints_{res}']       = cat_or_empty(all_edge_endpoints_only,(0,7),torch.float32)
+        collated[f'gt_combined_endpoints_{res}']   = cat_or_empty(all_combined_endpoints,(0,7),torch.float32)
+        collated[f'gt_combined_errors_{res}']      = cat_or_empty(all_combined_errors_list,(0,4),torch.float32)
+        collated[f'active_voxels_{res}'] = cat_or_empty(all_active_voxels_list, (0, 4), torch.int32)
+        if all_point_clouds_list:
+            collated[f'point_cloud_{res}'] = torch.stack(all_point_clouds_list, dim=0)
+        else:
+            # collated[f'point_cloud_{res}'] = torch.empty((0, 6), dtype=torch.float32, device=device)
+            collated[f'point_cloud_{res}'] = torch.empty((0, 15), dtype=torch.float32, device=device)
+    return collated

dataset_triposf_head.py ADDED Viewed

	@@ -0,0 +1,1000 @@

+import torch
+import os
+from typing import *
+import trimesh
+import numpy as np
+from torch.utils.data import Dataset
+import torch.nn.functional as F
+# from utils import quantize_vertices
+from utils import get_voxel_line
+import random
+import hashlib
+import json
+from tqdm import tqdm
+from torchvision import transforms
+from PIL import Image
+import rembg
+import open3d as o3d
+from trimesh import grouping
+def normalize_mesh(mesh_path):
+    scene = trimesh.load(mesh_path, process=False, force='scene')
+    meshes = []
+    for node_name in scene.graph.nodes_geometry:
+        geom_name = scene.graph[node_name][1]
+        geometry = scene.geometry[geom_name]
+        transform = scene.graph[node_name][0]
+        if isinstance(geometry, trimesh.Trimesh):
+            geometry.apply_transform(transform)
+            meshes.append(geometry)
+    mesh = trimesh.util.concatenate(meshes)
+    center = mesh.bounding_box.centroid
+    mesh.apply_translation(-center)
+    scale = max(mesh.bounding_box.extents)
+    mesh.apply_scale(2.0 / scale * 0.5)
+    return mesh
+def quantize_vertices(vertices: torch.Tensor, res: int):
+    """
+    Quantize normalized vertices (range approx [-0.5, 0.5]) to integer grid [0, res-1].
+    """
+    normalized = vertices + 0.5
+    scaled = normalized * res
+    quantized = torch.floor(scaled).clamp(0, res - 1).int()
+    return quantized
+def sample_edges_dora(tm_mesh, n_samples):
+    adj_faces = tm_mesh.face_adjacency
+    adj_edges = tm_mesh.face_adjacency_edges
+    internal_data = None
+    if len(adj_faces) > 0:
+        n0 = tm_mesh.face_normals[adj_faces[:, 0]]
+        n1 = tm_mesh.face_normals[adj_faces[:, 1]]
+        sum_normals = n0 + n1
+        norms = np.linalg.norm(sum_normals, axis=1, keepdims=True)
+        norms[norms < 1e-6] = 1.0
+        int_normals = sum_normals / norms
+        int_v_start = tm_mesh.vertices[adj_edges[:, 0]]
+        int_v_end = tm_mesh.vertices[adj_edges[:, 1]]
+        faces_pair = tm_mesh.faces[adj_faces]
+        sum_face_indices = np.sum(faces_pair, axis=2)
+        sum_edge_indices = np.sum(adj_edges, axis=1)
+        unique_idx_0 = sum_face_indices[:, 0] - sum_edge_indices
+        unique_idx_1 = sum_face_indices[:, 1] - sum_edge_indices
+        v_unique_0 = tm_mesh.vertices[unique_idx_0]
+        v_unique_1 = tm_mesh.vertices[unique_idx_1]
+        int_v_virtual = (v_unique_0 + v_unique_1) * 0.5
+        internal_data = (int_v_start, int_v_end, int_normals, int_v_virtual)
+    edges_sorted = tm_mesh.edges_sorted
+    if len(edges_sorted) == 0:
+        boundary_data = None
+    else:
+        boundary_group = grouping.group_rows(edges_sorted, require_count=1)
+        boundary_data = None
+        if len(boundary_group) > 0:
+            boundary_indices = np.concatenate([np.atleast_1d(g) for g in boundary_group])
+            boundary_face_indices = boundary_indices // 3
+            bnd_normals = tm_mesh.face_normals[boundary_face_indices]
+            bnd_edge_v_indices = edges_sorted[boundary_indices]
+            bnd_v_start = tm_mesh.vertices[bnd_edge_v_indices[:, 0]]
+            bnd_v_end = tm_mesh.vertices[bnd_edge_v_indices[:, 1]]
+            boundary_face_v_indices = tm_mesh.faces[boundary_face_indices]
+            sum_face = np.sum(boundary_face_v_indices, axis=1)
+            sum_edge = np.sum(bnd_edge_v_indices, axis=1)
+            unique_idx = sum_face - sum_edge
+            bnd_v_virtual = tm_mesh.vertices[unique_idx]
+            boundary_data = (bnd_v_start, bnd_v_end, bnd_normals, bnd_v_virtual)
+    if internal_data is None and boundary_data is None:
+        return None, None, None
+    parts_start, parts_end, parts_norm, parts_virt = [], [], [], []
+    if internal_data is not None:
+        parts_start.append(internal_data[0])
+        parts_end.append(internal_data[1])
+        parts_norm.append(internal_data[2])
+        parts_virt.append(internal_data[3])
+    if boundary_data is not None:
+        parts_start.append(boundary_data[0])
+        parts_end.append(boundary_data[1])
+        parts_norm.append(boundary_data[2])
+        parts_virt.append(boundary_data[3])
+    if not parts_start:
+        return None, None, None
+    all_v_start = np.concatenate(parts_start, axis=0)
+    all_v_end = np.concatenate(parts_end, axis=0)
+    all_normals = np.concatenate(parts_norm, axis=0)
+    all_v_virtual = np.concatenate(parts_virt, axis=0)
+    edge_vectors = all_v_end - all_v_start
+    edge_lengths = np.linalg.norm(edge_vectors, axis=1)
+    total_length = np.sum(edge_lengths)
+    if total_length < 1e-9:
+        probs = np.ones(len(edge_lengths)) / len(edge_lengths)
+    else:
+        probs = edge_lengths / total_length
+        probs = probs / probs.sum()
+    chosen_indices = np.random.choice(len(edge_lengths), size=n_samples, p=probs)
+    t = np.random.rand(n_samples, 1)
+    sel_v_start = all_v_start[chosen_indices]
+    sel_v_end = all_v_end[chosen_indices]
+    sel_normals = all_normals[chosen_indices]
+    sel_v_virtual = all_v_virtual[chosen_indices]
+    sampled_points = sel_v_start + (sel_v_end - sel_v_start) * t
+    vertex_triplets = np.stack([sel_v_start, sel_v_end, sel_v_virtual], axis=1).astype(np.float32)
+    return sampled_points.astype(np.float32), sel_normals.astype(np.float32), vertex_triplets
+def sample_edges_hunyuan(tm_mesh, n_samples):
+    if tm_mesh.vertex_normals is None:
+        tm_mesh.compute_vertex_normals()
+    V = tm_mesh.vertices
+    F = tm_mesh.faces
+    VN = tm_mesh.vertex_normals
+    # Edge 0: v0 -> v1 (Virtual: v2)
+    # Edge 1: v1 -> v2 (Virtual: v0)
+    # Edge 2: v2 -> v0 (Virtual: v1)
+    idx_start = np.concatenate([F[:, 0], F[:, 1], F[:, 2]])
+    idx_end   = np.concatenate([F[:, 1], F[:, 2], F[:, 0]])
+    idx_virt  = np.concatenate([F[:, 2], F[:, 0], F[:, 1]])
+    v_start = V[idx_start]
+    v_end   = V[idx_end]
+    edge_vectors = v_end - v_start
+    edge_lengths = np.linalg.norm(edge_vectors, axis=1)
+    total_length = np.sum(edge_lengths)
+    if total_length < 1e-9:
+        probs = np.ones(len(edge_lengths)) / len(edge_lengths)
+    else:
+        probs = edge_lengths / total_length
+    chosen_indices = np.random.choice(len(edge_lengths), size=n_samples, p=probs)
+    sel_v_start = v_start[chosen_indices]
+    sel_v_end   = v_end[chosen_indices]
+    sel_v_virt  = V[idx_virt[chosen_indices]]
+    sel_vn_start = VN[idx_start[chosen_indices]]
+    sel_vn_end   = VN[idx_end[chosen_indices]]
+    t = np.random.rand(n_samples, 1)
+    sampled_points = sel_v_start + (sel_v_end - sel_v_start) * t
+    sampled_normals = sel_vn_start * (1 - t) + sel_vn_end * t
+    norm_vals = np.linalg.norm(sampled_normals, axis=1, keepdims=True)
+    norm_vals[norm_vals < 1e-6] = 1.0
+    sampled_normals = sampled_normals / norm_vals
+    vertex_triplets = np.stack([sel_v_start, sel_v_end, sel_v_virt], axis=1)
+    return sampled_points.astype(np.float32), sampled_normals.astype(np.float32), vertex_triplets.astype(np.float32)
+def load_quantized_mesh_dora(
+    mesh_path,
+    mesh_load=None,
+    volume_resolution=256,
+    use_normals=True,
+    pc_sample_number=4096000,
+    edge_sample_ratio=0.2
+):
+    cube_dilate = np.array([
+        [0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 0, -1], [0, -1, 0], [0, 1, 1], [0, -1, 1], [0, 1, -1], [0, -1, -1],
+        [1, 0, 0], [1, 0, 1], [1, 1, 0], [1, 0, -1], [1, -1, 0], [1, 1, 1], [1, -1, 1], [1, 1, -1], [1, -1, -1],
+        [-1, 0, 0], [-1, 0, 1], [-1, 1, 0], [-1, 0, -1], [-1, -1, 0], [-1, 1, 1], [-1, -1, 1], [-1, 1, -1], [-1, -1, -1]
+    ]) / (volume_resolution * 4 - 1)
+    if mesh_load is None:
+        mesh_o3d = o3d.io.read_triangle_mesh(mesh_path)
+        vertices = np.clip(np.asarray(mesh_o3d.vertices), -0.5 + 1e-6, 0.5 - 1e-6)
+        faces = np.asarray(mesh_o3d.triangles)
+        mesh_o3d.vertices = o3d.utility.Vector3dVector(vertices)
+    else:
+        vertices = np.clip(np.asarray(mesh_load.vertices), -0.5 + 1e-6, 0.5 - 1e-6)
+        faces = np.asarray(mesh_load.faces)
+        mesh_o3d = o3d.geometry.TriangleMesh()
+        mesh_o3d.vertices = o3d.utility.Vector3dVector(vertices)
+        mesh_o3d.triangles = o3d.utility.Vector3iVector(faces)
+    tm_mesh = trimesh.Trimesh(vertices=vertices, faces=faces, process=False)
+    n_edge_samples = int(pc_sample_number * edge_sample_ratio)
+    p_edge, n_edge, triplets_edge = sample_edges_dora(tm_mesh, n_edge_samples)
+    if p_edge is None:
+        # print('p_edge is none!')
+        n_surface_samples = pc_sample_number
+    else:
+        # print('p_edge is right!')
+        n_surface_samples = pc_sample_number - n_edge_samples
+    p_surf, idx_surf = tm_mesh.sample(n_surface_samples, return_index=True)
+    p_surf = p_surf.astype(np.float32)
+    n_surf = tm_mesh.face_normals[idx_surf].astype(np.float32)
+    v_indices_surf = faces[idx_surf]
+    triplets_surf = vertices[v_indices_surf]
+    if p_edge is None:
+        final_points = p_surf
+        final_normals = n_surf
+        final_triplets = triplets_surf
+    else:
+        final_points = np.concatenate([p_surf, p_edge.astype(np.float32)], axis=0)
+        if use_normals:
+            final_normals = np.concatenate([n_surf, n_edge.astype(np.float32)], axis=0)
+        final_triplets = np.concatenate([triplets_surf, triplets_edge], axis=0)
+    voxelization_mesh = o3d.geometry.VoxelGrid.create_from_triangle_mesh_within_bounds(
+            mesh_o3d,
+            voxel_size=1. / volume_resolution,
+            min_bound=[-0.5, -0.5, -0.5],
+            max_bound=[0.5, 0.5, 0.5]
+        )
+    voxel_mesh = np.asarray([voxel.grid_index for voxel in voxelization_mesh.get_voxels()])
+    voxelization_points = o3d.geometry.VoxelGrid.create_from_point_cloud_within_bounds(
+            o3d.geometry.PointCloud(
+                o3d.utility.Vector3dVector(
+                    np.clip(
+                        (final_points[np.newaxis] + cube_dilate[..., np.newaxis, :]).reshape(-1, 3),
+                        -0.5 + 1e-6, 0.5 - 1e-6)
+                    )
+                ),
+            voxel_size=1. / volume_resolution,
+            min_bound=[-0.5, -0.5, -0.5],
+            max_bound=[0.5, 0.5, 0.5]
+        )
+    voxel_points = np.asarray([voxel.grid_index for voxel in voxelization_points.get_voxels()])
+    voxels = torch.Tensor(np.unique(np.concatenate([voxel_mesh, voxel_points]), axis=0))
+    features_list = [torch.from_numpy(final_points)]
+    if use_normals:
+        features_list.append(torch.from_numpy(final_normals))
+    view_dtype = np.dtype((np.void, final_triplets.dtype.itemsize * final_triplets.shape[-1]))
+    v_view = final_triplets.view(view_dtype).squeeze(-1)
+    sort_idx = np.argsort(v_view, axis=1)
+    batch_indices = np.arange(final_triplets.shape[0])[:, None]
+    v_sorted = final_triplets[batch_indices, sort_idx]
+    v1 = v_sorted[:, 0, :]
+    v2 = v_sorted[:, 1, :]
+    v3 = v_sorted[:, 2, :]
+    dir1 = v1 - final_points
+    dir2 = v2 - final_points
+    dir3 = v3 - final_points
+    features_list.append(torch.Tensor(dir1.astype(np.float32)))
+    features_list.append(torch.Tensor(dir2.astype(np.float32)))
+    features_list.append(torch.Tensor(dir3.astype(np.float32)))
+    points_sample = torch.cat(features_list, axis=-1)
+    return voxels, points_sample
+def load_quantized_mesh_original(
+    mesh_path,
+    mesh_load=None,
+    volume_resolution=256,
+    use_normals=True,
+    pc_sample_number=4096000,
+):
+    cube_dilate = np.array(
+            [
+                [0, 0, 0],
+                [0, 0, 1],
+                [0, 1, 0],
+                [0, 0, -1],
+                [0, -1, 0],
+                [0, 1, 1],
+                [0, -1, 1],
+                [0, 1, -1],
+                [0, -1, -1],
+                [1, 0, 0],
+                [1, 0, 1],
+                [1, 1, 0],
+                [1, 0, -1],
+                [1, -1, 0],
+                [1, 1, 1],
+                [1, -1, 1],
+                [1, 1, -1],
+                [1, -1, -1],
+                [-1, 0, 0],
+                [-1, 0, 1],
+                [-1, 1, 0],
+                [-1, 0, -1],
+                [-1, -1, 0],
+                [-1, 1, 1],
+                [-1, -1, 1],
+                [-1, 1, -1],
+                [-1, -1, -1],
+            ]
+        ) / (volume_resolution * 4 - 1)
+    if mesh_load is None:
+        mesh = o3d.io.read_triangle_mesh(mesh_path)
+        vertices = np.clip(np.asarray(mesh.vertices), -0.5 + 1e-6, 0.5 - 1e-6)
+        faces = np.asarray(mesh.triangles)
+        mesh.vertices = o3d.utility.Vector3dVector(vertices)
+    else:
+        vertices = np.clip(np.asarray(mesh_load.vertices), -0.5 + 1e-6, 0.5 - 1e-6)
+        faces = np.asarray(mesh_load.faces)
+        mesh = o3d.geometry.TriangleMesh()
+        mesh.vertices = o3d.utility.Vector3dVector(vertices)
+        mesh.triangles = o3d.utility.Vector3iVector(faces)
+    voxelization_mesh = o3d.geometry.VoxelGrid.create_from_triangle_mesh_within_bounds(
+            mesh,
+            voxel_size=1. / volume_resolution,
+            min_bound=[-0.5, -0.5, -0.5],
+            max_bound=[0.5, 0.5, 0.5]
+        )
+    voxel_mesh = np.asarray([voxel.grid_index for voxel in voxelization_mesh.get_voxels()])
+    points_normals_sample = trimesh.Trimesh(vertices=vertices, faces=faces).sample(count=pc_sample_number, return_index=True)
+    points_xyz_np = points_normals_sample[0].astype(np.float32)
+    points_sample = points_normals_sample[0].astype(np.float32)
+    face_indices = points_normals_sample[1]
+    voxelization_points = o3d.geometry.VoxelGrid.create_from_point_cloud_within_bounds(
+            o3d.geometry.PointCloud(
+                o3d.utility.Vector3dVector(
+                    np.clip(
+                        (points_sample[np.newaxis] + cube_dilate[..., np.newaxis, :]).reshape(-1, 3),
+                        -0.5 + 1e-6, 0.5 - 1e-6)
+                    )
+                ),
+            voxel_size=1. / volume_resolution,
+            min_bound=[-0.5, -0.5, -0.5],
+            max_bound=[0.5, 0.5, 0.5]
+        )
+    voxel_points = np.asarray([voxel.grid_index for voxel in voxelization_points.get_voxels()])
+    voxels = torch.Tensor(np.unique(np.concatenate([voxel_mesh, voxel_points]), axis=0))
+    features_list = [torch.from_numpy(points_xyz_np)]
+    if use_normals:
+        mesh.compute_triangle_normals()
+        normals_sample = np.asarray(
+                            mesh.triangle_normals
+                        )[points_normals_sample[1]].astype(np.float32)
+        # points_sample = torch.cat((torch.Tensor(points_sample), torch.Tensor(normals_sample)), axis=-1)
+        features_list.append(torch.from_numpy(normals_sample))
+    ########################################
+    # add direction to three vtx
+    ########################################
+    ## wo sort
+    # sampled_face_v_indices = faces[face_indices]
+    # v1 = vertices[sampled_face_v_indices[:, 0]]
+    # v2 = vertices[sampled_face_v_indices[:, 1]]
+    # v3 = vertices[sampled_face_v_indices[:, 2]]
+    # w sort
+    sampled_face_v_indices = faces[face_indices]
+    v_batch = np.stack([
+        vertices[sampled_face_v_indices[:, 0]],
+        vertices[sampled_face_v_indices[:, 1]],
+        vertices[sampled_face_v_indices[:, 2]]
+    ], axis=1)
+    view_dtype = np.dtype((np.void, v_batch.dtype.itemsize * v_batch.shape[-1]))
+    v_view = v_batch.view(view_dtype).squeeze(-1) # 变成 (N, 3) 的 void
+    sort_idx = np.argsort(v_view, axis=1) # (N, 3)
+    batch_indices = np.arange(v_batch.shape[0])[:, None]
+    v_sorted = v_batch[batch_indices, sort_idx] # (N, 3, 3)
+    v1 = v_sorted[:, 0, :]
+    v2 = v_sorted[:, 1, :]
+    v3 = v_sorted[:, 2, :]
+    # --------------------
+    dir1 = v1 - points_xyz_np
+    dir2 = v2 - points_xyz_np
+    dir3 = v3 - points_xyz_np
+    features_list.append(torch.Tensor(dir1.astype(np.float32)))
+    features_list.append(torch.Tensor(dir2.astype(np.float32)))
+    features_list.append(torch.Tensor(dir3.astype(np.float32)))
+    points_sample = torch.cat(features_list, axis=-1)
+    ########################################
+    # add direction to three vtx
+    ########################################
+    return voxels, points_sample
+def get_sha256(filepath: str) -> str:
+    sha256_hash = hashlib.sha256()
+    with open(filepath, "rb") as f:
+        for byte_block in iter(lambda: f.read(4096), b""):
+            sha256_hash.update(byte_block)
+    return sha256_hash.hexdigest()
+class VoxelVertexDataset_edge(Dataset):
+    def __init__(self,
+                root_dir: str,
+                base_resolution: int = 256,
+                min_resolution: int = 128,
+                img_res: int = 518,
+                cache_dir: str = "dataset_cache_test",
+                renders_dir: str = '/HOME/paratera_xy/pxy1054/HDD_POOL/Trisf/data/mesh_render_img/objaverse_200_2000/renders_cond',
+                process_img: bool = False,
+                n_pre_samples: int = 1024,
+                active_voxel_res: int = 64,
+                pc_sample_number: int = 409600,
+                filter_active_voxels: bool = False, #####
+                min_active_voxels: int = 2000,
+                max_active_voxels: int = 40000,
+                cache_filter_path: str = "/HOME/paratera_xy/pxy1054/HDD_POOL/Triposf/data/filter_name/objaverse_200_2000_2000min_25000max.txt",
+                sample_type: str = 'uniform',
+        ):
+        self.root_dir = root_dir
+        self.cache_dir = cache_dir
+        self.img_res = img_res
+        self.renders_dir = renders_dir
+        self.process_img = process_img
+        self.filter_active_voxels=filter_active_voxels
+        self.min_active_voxels=min_active_voxels
+        self.max_active_voxels=max_active_voxels
+        self.active_voxel_res = active_voxel_res
+        self.pc_sample_number = pc_sample_number
+        self.sample_type = sample_type
+        # self.image_transform = transforms.ToTensor()
+        self.image_transform = transforms.Compose([
+            transforms.ToTensor(),
+            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+        ])
+        os.makedirs(cache_dir, exist_ok=True)
+        assert (base_resolution & (base_resolution - 1)) == 0, "Resolution must be power of 2"
+        assert (min_resolution & (min_resolution - 1)) == 0, "Resolution must be power of 2"
+        self.res_levels = [
+            2**i for i in range(
+                int(np.log2(min_resolution)),
+                int(np.log2(base_resolution)) + 1
+            )
+        ]
+        if self.active_voxel_res is not None and self.active_voxel_res not in self.res_levels:
+            self.res_levels.append(active_voxel_res)
+            self.res_levels.sort()
+        all_obj_files = sorted([f for f in os.listdir(root_dir) if f.endswith(('.obj', '.ply', '.glb'))])
+        if not all_obj_files:
+            raise ValueError(f"No OBJ files found in {root_dir}")
+        if self.process_img:
+            map_file_path = os.path.join(os.path.dirname(self.renders_dir), 'map.json')
+            if os.path.exists(map_file_path):
+                print(f"Loading pre-computed hash map from {map_file_path}")
+                with open(map_file_path, 'r') as f:
+                    file_map = json.load(f)
+                filename_to_hash = {item['filename']: item['sha256'] for item in file_map}
+                all_obj_hashes = [filename_to_hash.get(fname) for fname in all_obj_files]
+            else:
+                print("No hash map found. Calculating SHA256 hashes on the fly... (This may take a moment)")
+                all_obj_hashes = []
+                for fname in tqdm(all_obj_files, desc="Hashing .obj files"):
+                    fpath = os.path.join(self.root_dir, fname)
+                    all_obj_hashes.append(get_sha256(fpath))
+        else:
+            print("process_img is False, skipping SHA256 hash calculation.")
+            all_obj_hashes = [None] * len(all_obj_files)
+        if self.filter_active_voxels and cache_filter_path:
+            filtered_list_cache_path = cache_filter_path
+            if os.path.exists(filtered_list_cache_path):
+                print(f"Loading filtered BASENAMES from: {filtered_list_cache_path}")
+                basename_to_fullname_map = {os.path.splitext(f)[0]: f for f in all_obj_files}
+                with open(filtered_list_cache_path, 'r') as f:
+                    filtered_basenames = [line.strip() for line in f if line.strip()]
+                self.obj_files = []
+                for basename in filtered_basenames:
+                    if basename in basename_to_fullname_map:
+                        self.obj_files.append(basename_to_fullname_map[basename])
+                    else:
+                        print(f"[WARN] Basename '{basename}' from filter list not found in directory '{self.root_dir}'. Skipping.")
+                file_to_hash_map = dict(zip(all_obj_files, all_obj_hashes))
+                self.obj_hashes = [file_to_hash_map.get(fname) for fname in self.obj_files] # 使用 .get 更安全
+                print(f"Loaded and matched {len(self.obj_files)} samples from the filter list.")
+            else:
+                print(f"Cache filter file not found: {filtered_list_cache_path}. Proceeding with on-the-fly filtering...")
+        else:
+            self.obj_files = all_obj_files
+            self.obj_hashes = all_obj_hashes
+        if not self.obj_files:
+            raise ValueError(f"No OBJ files found in {root_dir}")
+        self.rembg_session = None
+    def _init_rembg_session_if_needed(self):
+        if self.rembg_session is None:
+            print(f"Initializing rembg session for worker {os.getpid()}...")
+            self.rembg_session = rembg.new_session(model_name='u2net')
+    def preprocess_image(self, input: Image.Image) -> Image.Image:
+        self._init_rembg_session_if_needed()
+        has_alpha = False
+        if input.mode == 'RGBA':
+            alpha = np.array(input)[:, :, 3]
+            if not np.all(alpha == 255):
+                has_alpha = True
+        if has_alpha:
+            output = input
+        else:
+            input = input.convert('RGB')
+            max_size = max(input.size)
+            scale = min(1, 1024 / max_size)
+            if scale < 1:
+                input = input.resize((int(input.width * scale), int(input.height * scale)), Image.Resampling.LANCZOS)
+            if getattr(self, 'rembg_session', None) is None:
+                self.rembg_session = rembg.new_session('u2net')
+            output = rembg.remove(input, session=self.rembg_session)
+        output_np = np.array(output)
+        alpha = output_np[:, :, 3]
+        bbox = np.argwhere(alpha > 0.8 * 255)
+        bbox = np.min(bbox[:, 1]), np.min(bbox[:, 0]), np.max(bbox[:, 1]), np.max(bbox[:, 0])
+        center = (bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2
+        size = max(bbox[2] - bbox[0], bbox[3] - bbox[1])
+        size = int(size * 1.2)
+        bbox = center[0] - size // 2, center[1] - size // 2, center[0] + size // 2, center[1] + size // 2
+        output = output.crop(bbox)  # type: ignore
+        output = output.resize((518, 518), Image.Resampling.LANCZOS)
+        output = np.array(output).astype(np.float32) / 255
+        output = output[:, :, :3] * output[:, :, 3:4]
+        output = Image.fromarray((output * 255).astype(np.uint8))
+        return output
+    def __getitem__(self, idx: int) -> Dict[str, torch.Tensor]:
+        name = os.path.splitext(self.obj_files[idx])[0]
+        cache_path = os.path.join(self.cache_dir, f"{name}_precombined.npz")
+        sha256_hash = self.obj_hashes[idx]
+        mesh_render_dir = os.path.join(self.renders_dir, sha256_hash) if sha256_hash else ""
+        image_path = ''
+        if mesh_render_dir and os.path.isdir(mesh_render_dir):
+            try:
+                render_files = [f for f in os.listdir(mesh_render_dir) if f.endswith('.png')]
+                if render_files:
+                    image_path = os.path.join(mesh_render_dir, random.choice(render_files))
+            except OSError as e:
+                print(f"[WARN] Could not access render directory {mesh_render_dir}: {e}")
+        if self.process_img:
+            try:
+                if image_path and os.path.exists(image_path):
+                    image_obj = self.image_transform(self.preprocess_image(Image.open(image_path)).convert('RGB'))
+                else:
+                    image_obj = self.image_transform(Image.fromarray(np.zeros((self.img_res, self.img_res, 3), dtype=np.uint8)).convert('RGB'))
+            except Exception as e:
+                image_obj = self.image_transform(Image.fromarray(np.zeros((self.img_res, self.img_res, 3), dtype=np.uint8)).convert('RGB'))
+                print(f'Error processing image {image_path}: {e}')
+        if os.path.exists(cache_path):
+            try:
+                loaded = np.load(cache_path, allow_pickle=True)
+                data = {
+                    'original_faces': torch.from_numpy(loaded['original_faces']),
+                    'original_vertices': torch.from_numpy(loaded['original_vertices']),
+                }
+                for res in self.res_levels:
+                    # Load standard voxel data
+                    if f'combined_voxels_{res}' in loaded:
+                        data[f'combined_voxels_{res}'] = torch.from_numpy(loaded[f'combined_voxels_{res}'])
+                        data[f'combined_voxel_labels_{res}'] = torch.from_numpy(loaded[f'combined_voxel_labels_{res}'])
+                        data[f'gt_combined_endpoints_{res}'] = torch.from_numpy(loaded[f'gt_combined_endpoints_{res}'])
+                        data[f'gt_vertex_voxels_{res}'] = torch.from_numpy(loaded[f'gt_vertex_voxels_{res}'])
+                        data[f'gt_edge_voxels_{res}'] = torch.from_numpy(loaded[f'gt_edge_voxels_{res}'])
+                        data[f'gt_edge_endpoints_{res}'] = torch.from_numpy(loaded[f'gt_edge_endpoints_{res}'])
+                        data[f'gt_edge_errors_{res}'] = torch.from_numpy(loaded[f'gt_edge_errors_{res}'])
+                    # Load Active Voxels and Point Cloud for Local Pooling
+                    if res == self.active_voxel_res:
+                        if f'active_voxels_{res}' in loaded:
+                            data[f'active_voxels_{res}'] = torch.from_numpy(loaded[f'active_voxels_{res}'])
+                        if f'point_cloud_{res}' in loaded:
+                            data[f'point_cloud_{res}'] = torch.from_numpy(loaded[f'point_cloud_{res}'])
+                    if f'gt_vertex_edge_indices_{res}' in loaded:
+                        data[f'gt_vertex_edge_indices_{res}'] = torch.from_numpy(loaded[f'gt_vertex_edge_indices_{res}'])
+                if self.process_img:
+                    data['image'] = image_obj
+                data['image_path'] = image_path
+                return data
+            except Exception as e:
+                print(f"[WARN] Corrupted NPZ cache {cache_path}, regenerating... {e}")
+                os.remove(cache_path)
+        try:
+            mesh_path = os.path.join(self.root_dir, self.obj_files[idx])
+            mesh = normalize_mesh(mesh_path)
+            if mesh.is_empty or not hasattr(mesh.vertices, 'shape') or mesh.vertices.shape[0] < 3 or not hasattr(mesh.faces, 'shape') or mesh.faces.shape[0] < 1:
+                raise ValueError("Invalid or empty mesh")
+        except Exception as e:
+            print(f"[ERROR] Failed to load mesh: {self.obj_files[idx]} | {e}")
+            return self.__getitem__((idx + 1) % len(self))
+        vertices = torch.tensor(mesh.vertices, dtype=torch.float32)
+        faces = torch.tensor(mesh.faces, dtype=torch.long)
+        data = {'original_faces': faces.clone(), 'original_vertices': vertices.clone()}
+        for res in self.res_levels:
+            quantized = quantize_vertices(vertices, res)
+            tmesh = trimesh.Trimesh(vertices=quantized.numpy(), faces=faces.numpy())
+            tmesh.merge_vertices()
+            vertex_voxels_raw = torch.from_numpy(tmesh.vertices.astype(np.int32))
+            edges_raw = tmesh.edges_unique
+            edges_indices_raw = torch.from_numpy(tmesh.edges_unique.astype(np.long))
+            data[f'gt_vertex_edge_indices_{res}'] = edges_indices_raw
+            vertex_labels_raw = torch.zeros(vertex_voxels_raw.shape[0], dtype=torch.long)
+            all_edge_voxels = []
+            edge_endpoints = []
+            edge_errors = []
+            for u_idx, v_idx in edges_raw:
+                p1_grid, p2_grid = vertex_voxels_raw[u_idx].float(), vertex_voxels_raw[v_idx].float()
+                v, ep, err = get_voxel_line(p1_grid, p2_grid, mode='cpu')
+                all_edge_voxels.extend(v)
+                edge_endpoints.extend(ep)
+                edge_errors.extend(err)
+            if all_edge_voxels:
+                edge_voxels_np = np.array(all_edge_voxels, dtype=np.int32)
+                edge_endpoints_np = np.array([np.stack(pair) for pair in edge_endpoints], dtype=np.float32)
+                edge_errors_np = np.array(edge_errors, dtype=np.float32)
+                unique_edge_voxels_np, first_indices = np.unique(edge_voxels_np, axis=0, return_index=True)
+                edge_voxels_raw = torch.from_numpy(unique_edge_voxels_np)
+                edge_labels_raw = torch.ones(len(edge_voxels_raw), dtype=torch.long)
+                edge_endpoints_raw = torch.from_numpy(edge_endpoints_np[first_indices])
+                edge_errors_raw = torch.from_numpy(edge_errors_np[first_indices])
+            else:
+                edge_voxels_raw = torch.empty(0, 3, dtype=torch.int32)
+                edge_labels_raw = torch.empty(0, dtype=torch.long)
+                edge_endpoints_raw = torch.empty(0, 2, 3, dtype=torch.float32)
+                edge_errors_raw = torch.empty(0, 3, dtype=torch.float32)
+            if res == self.active_voxel_res:
+                try:
+                    if self.sample_type == 'uniform':
+                        # triposf-style, normilize wrong
+                        ts_voxels, ts_points = load_quantized_mesh_original(
+                            mesh_path=os.path.join(self.root_dir, self.obj_files[idx]),
+                            mesh_load=mesh,
+                            volume_resolution=res,
+                            use_normals=True,
+                            pc_sample_number=self.pc_sample_number,
+                        )
+                    else:
+                        ts_voxels, ts_points = load_quantized_mesh_dora(
+                            mesh_path=os.path.join(self.root_dir, self.obj_files[idx]),
+                            mesh_load=mesh,
+                            volume_resolution=res,
+                            use_normals=True,
+                            pc_sample_number=self.pc_sample_number,
+                            edge_sample_ratio=0.5,
+                        )
+                    # Convert types
+                    # Voxels from TripoSF are float Tensor (N, 3), convert to int32
+                    data[f'active_voxels_{res}'] = ts_voxels.int()
+                    data[f'point_cloud_{res}'] = ts_points
+                except Exception as e:
+                    print(f"[ERROR] Failed to compute active voxels/points for {name} at res {res}: {e}")
+                    data[f'active_voxels_{res}'] = torch.empty(0, 3, dtype=torch.int32)
+                    data[f'point_cloud_{res}'] = torch.empty(0, 6, dtype=torch.float32)
+            combined_voxels = torch.cat([vertex_voxels_raw, edge_voxels_raw], dim=0)
+            combined_labels = torch.cat([vertex_labels_raw, edge_labels_raw], dim=0)
+            if combined_voxels.numel() > 0:
+                unique_voxels, inverse_indices = torch.unique(combined_voxels, dim=0, return_inverse=True)
+                zero_mask = (combined_labels == 0)
+                if zero_mask.any():
+                    zero_per_unique = torch.zeros(len(unique_voxels), dtype=torch.bool)
+                    zero_per_unique.scatter_(0, inverse_indices[zero_mask], True)
+                    final_combined_labels = torch.where(zero_per_unique, 0, 1).long()
+                else:
+                    final_combined_labels = torch.ones(len(unique_voxels), dtype=torch.long)
+                if edge_voxels_raw.numel() > 0:
+                    edge_endpoint_map = {tuple(coord): ep for coord, ep in zip(edge_voxels_raw.numpy(), edge_endpoints_raw.numpy())}
+                    endpoints_arr = np.empty((len(unique_voxels), 2, 3), dtype=np.float32)
+                    unique_voxels_np = unique_voxels.numpy()
+                    for j, coord in enumerate(unique_voxels_np):
+                        coord_tuple = tuple(coord)
+                        if coord_tuple in edge_endpoint_map:
+                            endpoints_arr[j] = edge_endpoint_map[coord_tuple]
+                        else:
+                            endpoints_arr[j, 0, :] = coord
+                            endpoints_arr[j, 1, :] = coord
+                    final_combined_endpoints = torch.from_numpy(endpoints_arr)
+                else:
+                    final_combined_endpoints = unique_voxels.float().unsqueeze(1).repeat(1, 2, 1)
+            else:
+                unique_voxels = torch.empty(0, 3, dtype=torch.int32)
+                final_combined_labels = torch.empty(0, dtype=torch.long)
+                final_combined_endpoints = torch.empty(0, 2, 3, dtype=torch.float32)
+            data[f'combined_voxels_{res}'] = unique_voxels
+            data[f'combined_voxel_labels_{res}'] = final_combined_labels
+            data[f'gt_combined_endpoints_{res}'] = final_combined_endpoints.reshape(-1, 6)
+            data[f'gt_vertex_voxels_{res}'] = vertex_voxels_raw
+            data[f'gt_edge_voxels_{res}'] = edge_voxels_raw
+            data[f'gt_edge_endpoints_{res}'] = edge_endpoints_raw.reshape(-1, 6)
+            data[f'gt_edge_errors_{res}'] = edge_errors_raw
+        save_dict = {
+            'original_faces': data['original_faces'].numpy(),
+            'original_vertices': data['original_vertices'].numpy(),
+        }
+        for res in self.res_levels:
+            for key_suffix in [
+                'combined_voxels', 'combined_voxel_labels', 'gt_combined_endpoints',
+                'gt_vertex_voxels', 'gt_edge_voxels', 'gt_edge_endpoints', 'gt_edge_errors',
+                'gt_vertex_edge_indices',
+            ]:
+                full_key = f'{key_suffix}_{res}'
+                if full_key in data:
+                    save_dict[full_key] = data[full_key].numpy()
+            if f'active_voxels_{res}' in data:
+                save_dict[f'active_voxels_{res}'] = data[f'active_voxels_{res}'].numpy()
+            if f'point_cloud_{res}' in data:
+                save_dict[f'point_cloud_{res}'] = data[f'point_cloud_{res}'].numpy()
+        # try:
+        #     np.savez_compressed(cache_path, **save_dict)
+        # except Exception as e:
+        #     print(f"[ERROR] Failed to save cache {cache_path}: {e}")
+        #     if os.path.exists(cache_path): os.remove(cache_path)
+        if self.process_img:
+            data['image'] = image_obj
+        data['image_path'] = image_path
+        return data
+    def __len__(self) -> int:
+        return len(self.obj_files)
+def collate_fn_pointnet(
+    batch: List[Dict[str, torch.Tensor]],
+) -> Dict[str, torch.Tensor]:
+    if not batch:
+        return {}
+    batch = [b for b in batch if b is not None]
+    if not batch:
+        return {}
+    collated = {
+        'original_faces':    [b['original_faces']    for b in batch],
+        'original_vertices': [b['original_vertices'] for b in batch],
+        'image_path':        [b['image_path']        for b in batch],
+    }
+    if 'image' in batch[0] and batch[0]['image'] is not None:
+        collated['image'] = torch.stack([b['image'] for b in batch])
+    res_levels = []
+    for k in batch[0].keys():
+        if k.startswith('gt_vertex_voxels_'):
+            try:
+                res_levels.append(int(k.split('_')[-1]))
+            except ValueError:
+                pass
+    res_levels.sort()
+    for res in res_levels:
+        all_active_voxels_list = []
+        all_point_clouds_list = []
+        all_combined_voxels_list = []
+        all_combined_labels_list = []
+        all_vertex_voxels_only   = []
+        all_edge_voxels_only     = []
+        all_edge_endpoints_only  = []
+        all_combined_endpoints   = []
+        all_combined_errors_list = []
+        layout = []
+        vtx_offset = 0
+        adj_flat_offset = 0
+        start_idx = 0
+        # Attempt to find device from first tensor
+        device = torch.device('cpu')
+        for v in batch[0].values():
+            if isinstance(v, torch.Tensor):
+                device = v.device
+                break
+        all_edge_indices_list = []
+        vertex_count_offset = 0
+        for i, sample in enumerate(batch):
+            vertex_voxels = sample.get(f'gt_vertex_voxels_{res}', torch.empty(0,3,dtype=torch.int32)).to(device)
+            num_vertices = vertex_voxels.shape[0]
+            vertex_labels = torch.zeros(vertex_voxels.shape[0], dtype=torch.long, device=device)
+            edge_voxels   = sample.get(f'gt_edge_voxels_{res}',   torch.empty(0,3,dtype=torch.int32)).to(device)
+            edge_labels   = torch.ones(edge_voxels.shape[0], dtype=torch.long, device=device)
+            edge_endpoints= sample.get(f'gt_edge_endpoints_{res}', torch.empty(0,6,dtype=torch.float32)).to(device)
+            edge_errors   = sample.get(f'gt_edge_errors_{res}',   torch.empty(0,3,dtype=torch.float32)).to(device)
+            vertex_errors = sample.get(f'gt_vertex_errors_{res}', torch.zeros_like(vertex_voxels, dtype=torch.float32)).to(device)
+            if vertex_voxels.numel() > 0:
+                idx = torch.full((vertex_voxels.shape[0],1), i, dtype=torch.int32, device=device)
+                all_vertex_voxels_only.append(torch.cat([idx, vertex_voxels], dim=1))
+            edge_indices = sample.get(f'gt_vertex_edge_indices_{res}', torch.empty(0, 2, dtype=torch.long)).to(device)
+            if edge_indices.numel() > 0:
+                shifted_indices = edge_indices + vertex_count_offset
+                all_edge_indices_list.append(shifted_indices)
+            vertex_count_offset += num_vertices
+            if edge_voxels.numel() > 0:
+                idx = torch.full((edge_voxels.shape[0],1), i, dtype=torch.int32, device=device)
+                all_edge_voxels_only.append(torch.cat([idx, edge_voxels], dim=1))
+                all_edge_endpoints_only.append(
+                    torch.cat([idx.to(torch.float32), edge_endpoints], dim=1))
+            if vertex_voxels.numel() + edge_voxels.numel() > 0:
+                combined_voxels = torch.cat([vertex_voxels, edge_voxels], dim=0)
+                combined_labels = torch.cat([vertex_labels, edge_labels], dim=0)
+                endpoints = torch.zeros(combined_voxels.size(0), 6, dtype=torch.float32, device=device)
+                if edge_voxels.numel() > 0:
+                    endpoints[-edge_voxels.size(0):] = edge_endpoints
+                if vertex_voxels.numel() > 0:
+                    endpoints[:vertex_voxels.size(0)] = vertex_voxels.repeat(1,2).float()
+                combined_errors = torch.cat([vertex_errors, edge_errors], dim=0)
+                batch_idx_int = torch.full((combined_voxels.shape[0],1), i, dtype=torch.int32, device=device)
+                all_combined_voxels_list.append(torch.cat([batch_idx_int, combined_voxels], dim=1))
+                all_combined_labels_list.append(combined_labels)
+                batch_idx_float = batch_idx_int.to(torch.float32)
+                all_combined_endpoints.append(torch.cat([batch_idx_float, endpoints], dim=1))
+                all_combined_errors_list.append(torch.cat([batch_idx_float, combined_errors], dim=1))
+                layout.append(slice(start_idx, start_idx + combined_voxels.shape[0]))
+                start_idx += combined_voxels.shape[0]
+            else:
+                layout.append(slice(start_idx, start_idx))
+            # Active Voxels (Sparse Coords)
+            active_voxels = sample.get(f'active_voxels_{res}', torch.empty(0, 3, dtype=torch.int32)).to(device)
+            if active_voxels.numel() > 0:
+                idx = torch.full((active_voxels.shape[0], 1), i, dtype=torch.int32, device=device)
+                all_active_voxels_list.append(torch.cat([idx, active_voxels], dim=1))
+            # ==========================================
+            # Modified Section: Collect Point Clouds
+            # ==========================================
+            # pc = sample.get(f'point_cloud_{res}', torch.empty(0, 6, dtype=torch.float32)).to(device)
+            pc = sample.get(f'point_cloud_{res}', torch.empty(0, 15, dtype=torch.float32)).to(device)
+            # We expect all samples to have point clouds if res == active_voxel_res
+            if pc.numel() > 0:
+                all_point_clouds_list.append(pc)
+        collated[f'layout_{res}'] = layout
+        def cat_or_empty(lst, shape, dtype):
+            return torch.cat(lst, dim=0) if lst else torch.empty(shape, dtype=dtype, device=device)
+        collated[f'combined_voxels_{res}']         = cat_or_empty(all_combined_voxels_list,(0,4),torch.int32)
+        collated[f'combined_voxel_labels_{res}']   = cat_or_empty(all_combined_labels_list,(0,),torch.long)
+        collated[f'gt_vertex_voxels_{res}']        = cat_or_empty(all_vertex_voxels_only,(0,4),torch.int32)
+        collated[f'gt_edge_voxels_{res}']          = cat_or_empty(all_edge_voxels_only,(0,4),torch.int32)
+        collated[f'gt_edge_endpoints_{res}']       = cat_or_empty(all_edge_endpoints_only,(0,7),torch.float32)
+        collated[f'gt_combined_endpoints_{res}']   = cat_or_empty(all_combined_endpoints,(0,7),torch.float32)
+        collated[f'gt_combined_errors_{res}']      = cat_or_empty(all_combined_errors_list,(0,4),torch.float32)
+        collated[f'active_voxels_{res}'] = cat_or_empty(all_active_voxels_list, (0, 4), torch.int32)
+        if all_edge_indices_list:
+            collated[f'gt_vertex_edge_indices_{res}'] = torch.cat(all_edge_indices_list, dim=0)
+        else:
+            collated[f'gt_vertex_edge_indices_{res}'] = torch.empty((0, 2), dtype=torch.long, device=device)
+        if all_point_clouds_list:
+            collated[f'point_cloud_{res}'] = torch.stack(all_point_clouds_list, dim=0)
+        else:
+            # collated[f'point_cloud_{res}'] = torch.empty((0, 6), dtype=torch.float32, device=device)
+            collated[f'point_cloud_{res}'] = torch.empty((0, 15), dtype=torch.float32, device=device)
+    return collated

debug_viz/step_0_batch_0.ply ADDED Viewed

The diff for this file is too large to render. See raw diff

debug_viz/step_0_batch_1.ply ADDED Viewed

The diff for this file is too large to render. See raw diff

filter_active_voxels.py ADDED Viewed

	@@ -0,0 +1,106 @@

+import os
+import numpy as np
+from tqdm import tqdm
+from concurrent.futures import ProcessPoolExecutor, as_completed
+# === 配置 ===
+cache_dir = "/gemini/user/private/zhaotianhao/dataset_cache/MERGED_DATASET_count_200_2000_100000_128to1024_819200"
+# 1. Edge Voxels (512分辨率) 的筛选阈值
+target_res_edge = 512
+min_edge_voxels = 2000
+max_edge_voxels = 75000
+# 2. Active Voxels (64分辨率) 的筛选阈值
+# 请根据你的需求调整这两个数值
+target_res_active = 128
+min_active_voxels = 2000   # 举例：最少要有100个粗糙体素
+max_active_voxels = 326780  # 举例：最多8000个粗糙体素
+save_txt_path = f"/gemini/user/private/zhaotianhao/Triposf/MERGED_DATASET_filtered_{min_edge_voxels}-{max_edge_voxels}edge_{min_active_voxels}-{max_active_voxels}active.txt"
+# === 单文件统计函数 ===
+def check_voxel_counts(npz_path):
+    try:
+        # 打开 npz 文件
+        with np.load(npz_path) as data:
+            # 键名定义
+            key_edge = f"combined_voxels_{target_res_edge}"
+            key_active = f"active_voxels_{target_res_active}"
+            # 检查键是否存在
+            if key_edge not in data or key_active not in data:
+                return None
+            # 获取数量
+            count_edge = len(data[key_edge])
+            count_active = len(data[key_active])
+            # === 核心筛选逻辑 (同时满足两个条件) ===
+            is_edge_valid = min_edge_voxels <= count_edge <= max_edge_voxels
+            is_active_valid = min_active_voxels <= count_active <= max_active_voxels
+            if is_edge_valid and is_active_valid:
+                base_name = os.path.basename(npz_path)
+                # 处理文件名
+                if base_name.endswith("_precombined.npz"):
+                    original_name = base_name.replace("_precombined.npz", "")
+                else:
+                    original_name = os.path.splitext(base_name)[0]
+                return (original_name, count_edge, count_active)
+    except Exception:
+        return None
+    return None
+# === 获取所有 NPZ 文件 ===
+if not os.path.exists(cache_dir):
+    print(f"错误: 缓存目录不存在 {cache_dir}")
+    exit()
+npz_files = [os.path.join(cache_dir, f) for f in os.listdir(cache_dir) if f.endswith(".npz")]
+print(f"共发现 {len(npz_files)} 个缓存文件。开始并行过滤...")
+print(f"筛选条件:")
+print(f"  - Edge (512): {min_edge_voxels} ~ {max_edge_voxels}")
+print(f"  - Active (64): {min_active_voxels} ~ {max_active_voxels}")
+# === 并行过滤 ===
+filtered_files = []
+counts_edge = []
+counts_active = []
+with ProcessPoolExecutor(max_workers=os.cpu_count()) as executor:
+    futures = {executor.submit(check_voxel_counts, path): path for path in npz_files}
+    for future in tqdm(as_completed(futures), total=len(futures), desc="Filtering"):
+        result = future.result()
+        if result is not None:
+            fname, c_edge, c_active = result
+            filtered_files.append(fname)
+            counts_edge.append(c_edge)
+            counts_active.append(c_active)
+# === 保存结果 ===
+os.makedirs(os.path.dirname(save_txt_path), exist_ok=True)
+with open(save_txt_path, "w") as f:
+    for fname in filtered_files:
+        f.write(f"{fname}\n")
+# === 打印统计信息 ===
+print(f"\n✅ 筛选完成：")
+print(f"  符合条件的文件数: {len(filtered_files)} / {len(npz_files)} (保留率: {len(filtered_files)/len(npz_files)*100:.2f}%)")
+if counts_edge:
+    print(f"\n[统计 - Edge Voxels (512)]")
+    print(f"  最小值: {min(counts_edge)}")
+    print(f"  最大值: {max(counts_edge)}")
+    print(f"  平均值: {np.mean(counts_edge):.2f}")
+if counts_active:
+    print(f"\n[统计 - Active Voxels (64)]")
+    print(f"  最小值: {min(counts_active)}")
+    print(f"  最大值: {max(counts_active)}")
+    print(f"  平均值: {np.mean(counts_active):.2f}")
+print(f"\n  结果已保存到: {save_txt_path}")

generate_npz.py ADDED Viewed

	@@ -0,0 +1,118 @@

+import sys
+import os
+import yaml
+import torch
+import os
+from torch.utils.data import DataLoader
+from functools import partial
+# Assuming your custom modules are in the same directory or in the Python path
+# from dataset import VoxelVertexDataset_edge, collate_fn_edge
+from dataset_triposf import VoxelVertexDataset_edge, collate_fn_pointnet
+def inspect_batch(batch, batch_idx, device):
+    """
+    A detailed function to inspect and print information about a single batch.
+    """
+    print(f"\n{'='*20} Inspecting Batch {batch_idx} {'='*20}")
+    # if batch is None:
+    #     print("Batch is None. Skipping.")
+    #     return
+    # print("Batch contains the following keys:")
+    # for key in batch.keys():
+    #     print(f"  - {key}")
+    # print(f"{'='*58}")
+def main():
+    """
+    Main function to load configuration, set up the dataset,
+    and process a few batches for inspection.
+    """
+    import argparse
+    parser = argparse.ArgumentParser(description="Process and inspect data from the VoxelVertexDataset.")
+    # parser.add_argument('config_path', type=str, help='Path to the configuration YAML file.')
+    parser.add_argument('--num_batches', type=int, default=3, help='Number of batches to inspect.')
+    args = parser.parse_args()
+    # 1. Load Configuration
+    # print(f"Loading configuration from: {args.config_path}")
+    # with open(args.config_path) as f:
+    #     cfg = yaml.safe_load(f)
+    # 2. Initialize Device
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    print(f"Using device: {device}")
+    # 3. Initialize Dataset
+    print("Initializing dataset...")
+    # dataset = VoxelVertexDataset_edge(
+    #     root_dir='/HOME/paratera_xy/pxy1054/HDD_POOL/Triposf/final_data_decimate_2',
+    #     base_resolution=512,
+    #     min_resolution=64,
+    #     cache_dir='/HOME/paratera_xy/pxy1054/HDD_POOL/Triposf/dataset_cache/final_data_decimate_60w_2',
+    #     renders_dir=None,
+    # )
+    dataset = VoxelVertexDataset_edge(
+        root_dir='/root/mesh_split_200complex/mesh_split_200complex_train',
+        base_resolution=512,
+        min_resolution=64,
+        cache_dir='/root/Trisf/dataset_cache/objaverse_200_2000_filtered_final_8354files_512to512',
+        renders_dir=None,
+        filter_active_voxels=False,
+        cache_filter_path='',
+        active_voxel_res=512,
+        sample_type='dora',
+    )
+    # dataset = VoxelVertexDataset_edge(
+    #     root_dir='/HOME/paratera_xy/pxy1054/HDD_POOL/Triposf/meshgpt_data/train/03001627',
+    #     base_resolution=512,
+    #     min_resolution=64,
+    #     cache_dir='/HOME/paratera_xy/pxy1054/HDD_POOL/Triposf/dataset_cache/03001627',
+    #     renders_dir=None,
+    # )
+    # dataset = VoxelVertexDataset_edge(
+    #     root_dir='/HOME/paratera_xy/pxy1054/HDD_POOL/Triposf/meshgpt_data/train/03636649',
+    #     base_resolution=512,
+    #     min_resolution=64,
+    #     cache_dir='/HOME/paratera_xy/pxy1054/HDD_POOL/Triposf/dataset_cache/03636649',
+    #     renders_dir=None,
+    # )
+    # dataset = VoxelVertexDataset_edge(
+    #     root_dir='/HOME/paratera_xy/pxy1054/HDD_POOL/Triposf/meshgpt_data/train/04379243',
+    #     base_resolution=512,
+    #     min_resolution=64,
+    #     cache_dir='/HOME/paratera_xy/pxy1054/HDD_POOL/Triposf/dataset_cache/04379243',
+    #     renders_dir=None,
+    # )
+    print(f"Dataset initialized with {len(dataset)} samples.")
+    # 4. Initialize DataLoader
+    # We don't need a DistributedSampler here, just a regular DataLoader.
+    print("Initializing DataLoader...")
+    dataloader = DataLoader(
+        dataset,
+        batch_size=1,
+        shuffle=False,  # Shuffle for a random sample of batches
+        collate_fn=partial(collate_fn_pointnet,),
+        num_workers=24,
+        pin_memory=True,
+    )
+    # 5. Data Processing Loop
+    print(f"\nStarting data inspection loop for {args.num_batches} batches...")
+    for i, batch in enumerate(dataloader):
+        inspect_batch(batch, i, device)
+    print("\nData inspection complete.")
+if __name__ == '__main__':
+    main()

mesh_augment.py ADDED Viewed

	@@ -0,0 +1,79 @@

+import os
+import numpy as np
+import open3d as o3d
+import trimesh
+import random
+from tqdm import tqdm
+from pathlib import Path
+def augment_obj_file(input_path, output_path, n_augmentations=5):
+    """
+    Augment an OBJ file with random transformations
+    Args:
+        input_path: Path to input OBJ file
+        output_path: Directory to save augmented files
+        n_augmentations: Number of augmented copies to create
+    """
+    # Create output directory if it doesn't exist
+    os.makedirs(output_path, exist_ok=True)
+    # Load the original mesh
+    mesh = trimesh.load(input_path)
+    original_name = Path(input_path).stem
+    for i in range(n_augmentations):
+        # Create a copy of the original mesh
+        augmented_mesh = mesh.copy()
+        # Random rotation (0-360 degrees around each axis)
+        angle_x = np.random.uniform(0, 2*np.pi)
+        angle_y = np.random.uniform(0, 2*np.pi)
+        angle_z = np.random.uniform(0, 2*np.pi)
+        rotation_matrix = trimesh.transformations.euler_matrix(angle_x, angle_y, angle_z)
+        augmented_mesh.apply_transform(rotation_matrix)
+        # Random scaling (0.8-1.2 range)
+        scale_factor = np.random.uniform(0.8, 1.2, size=3)
+        scale_matrix = np.eye(4)
+        scale_matrix[:3, :3] *= scale_factor
+        augmented_mesh.apply_transform(scale_matrix)
+        # Random translation (-0.1 to 0.1 range in each dimension)
+        translation = np.random.uniform(-0.1, 0.1, size=3)
+        translation_matrix = np.eye(4)
+        translation_matrix[:3, 3] = translation
+        augmented_mesh.apply_transform(translation_matrix)
+        # Save the augmented mesh
+        output_file = os.path.join(output_path, f"{original_name}_aug_{i}.obj")
+        augmented_mesh.export(output_file)
+def augment_all_objs(source_dir, target_dir, n_augmentations=5):
+    """
+    Augment all OBJ files in a directory
+    Args:
+        source_dir: Directory containing original OBJ files
+        target_dir: Directory to save augmented files
+        n_augmentations: Number of augmented copies per file
+    """
+    # Get all OBJ files in source directory
+    obj_files = [f for f in os.listdir(source_dir) if f.endswith('.obj')]
+    print(f"Found {len(obj_files)} OBJ files to augment")
+    print(f"Will create {n_augmentations} augmented versions per file")
+    # Process each file
+    for obj_file in tqdm(obj_files, desc="Augmenting OBJ files"):
+        input_path = os.path.join(source_dir, obj_file)
+        augment_obj_file(input_path, target_dir, n_augmentations)
+    print(f"Finished! Augmented files saved to: {target_dir}")
+if __name__ == "__main__":
+    # Configuration
+    N_AUGMENTATIONS = 10  # Number of augmented copies per file
+    SOURCE_DIR = "/root/shapenet_data/train_mesh_data_under_25kb_1000/train"  # Directory with original OBJ files
+    TARGET_DIR = f"/root/shapenet_data/train_mesh_data_under_25kb_1000/train_{N_AUGMENTATIONS}augment"  # Where to save augmented files
+    # Run augmentation
+    augment_all_objs(SOURCE_DIR, TARGET_DIR, N_AUGMENTATIONS)

metric.py ADDED Viewed

	@@ -0,0 +1,300 @@

+import os
+import glob
+import torch
+import numpy as np
+import warnings
+import trimesh
+from scipy.stats import entropy
+from sklearn.neighbors import NearestNeighbors
+from numpy.linalg import norm
+from tqdm.auto import tqdm
+# ==============================================================================
+# 用户配置 (User Configuration)
+# ==============================================================================
+# --- 路径设置 ---
+# ！！重要提示！！
+# 当您计算真实指标时, 请确保这两个路径指向不同的文件夹
+# 这里为了方便测试, 设置为相同路径。当两个路径相同时:
+# MMD->0, COV->1.0, 1-NNA->0.5, JSD->0
+# 而 CD 和 HD 会是一个很小的值, 代表同一mesh两次不同采样的差异。
+# GENERATED_MESH_DIR = "/root/mesh_split_200complex/mesh_split_200complex_test"  # 存放生成的 .obj 文件的文件夹路径
+# GT_MESH_DIR = "/root/mesh_split_200complex/mesh_split_200complex_test"                # 存放真实的 .obj 文件的文件夹路径
+GENERATED_MESH_DIR = "/root/Trisf/experiments_edge/train_set/1e-2kl_base/epoch_20_test_set_obj_0gs"  # 存放生成的 .obj 文件的文件夹路径
+GT_MESH_DIR = "/root/Trisf/abalation_post_processing/gt_mesh"                # 存放真实的 .obj 文件的文件夹路径
+# --- 采样和计算参数 ---
+NUM_POINTS_PER_MESH = 2048  # 从每个mesh表面采样的点数
+BATCH_SIZE = 32             # 计算指标时使用的批次大小，根据显存调整
+JSD_RESOLUTION = 28         # JSD计算中体素网格的分辨率
+# ==============================================================================
+# 核心功能函数: Mesh处理
+# ==============================================================================
+def process_meshes_in_folder(folder_path, num_points):
+    """
+    加载文件夹中所有的 .obj 文件, 将它们采样成点云, 并进行归一化。
+    """
+    # 按文件名排序以确保一一对应
+    mesh_files = sorted(glob.glob(os.path.join(folder_path, '*.obj')))
+    if not mesh_files:
+        raise FileNotFoundError(f"在文件夹 '{folder_path}' 中没有找到任何 .obj 文件。")
+    all_point_clouds = []
+    print(f"正在从 '{folder_path}' 处理 {len(mesh_files)} 个mesh...")
+    for mesh_path in tqdm(mesh_files, desc=f'处理 {os.path.basename(folder_path)}'):
+        try:
+            mesh = trimesh.load(mesh_path, process=False)
+            # 归一化: 移动到原点并缩放到单位球体内
+            center = mesh.bounds.mean(axis=0)
+            mesh.apply_translation(-center)
+            max_dist = np.max(np.linalg.norm(mesh.vertices, axis=1))
+            if max_dist > 0:
+                mesh.apply_scale(1.0 / max_dist)
+            points, _ = trimesh.sample.sample_surface(mesh, num_points)
+            if points.shape[0] != num_points:
+                # print(f"警告: {mesh_path} 采样点数 {points.shape[0]} != {num_points}, 进行重采样。")
+                indices = np.random.choice(points.shape[0], num_points, replace=True)
+                points = points[indices]
+            all_point_clouds.append(points)
+        except Exception as e:
+            print(f"错误：加载或处理文件 {mesh_path} 失败: {e}")
+    return np.array(all_point_clouds)
+# ==============================================================================
+# 评估指标代码 (来自 PointFlow 及新增)
+# ==============================================================================
+_EMD_NOT_IMPL_WARNED = False
+def emd_approx(sample, ref):
+    global _EMD_NOT_IMPL_WARNED
+    emd = torch.zeros([sample.size(0)]).to(sample)
+    if not _EMD_NOT_IMPL_WARNED:
+        _EMD_NOT_IMPL_WARNED = True
+        print('\n\n[WARNING] EMD is not implemented. Setting to zero.')
+    return emd
+def distChamfer(a, b):
+    x, y = a, b
+    bs, num_points, points_dim = x.size()
+    xx = torch.bmm(x, x.transpose(2, 1))
+    yy = torch.bmm(y, y.transpose(2, 1))
+    zz = torch.bmm(x, y.transpose(2, 1))
+    diag_ind = torch.arange(0, num_points, device=a.device).long()
+    rx = xx[:, diag_ind, diag_ind].unsqueeze(1).expand_as(xx)
+    ry = yy[:, diag_ind, diag_ind].unsqueeze(1).expand_as(yy)
+    P = (rx.transpose(2, 1) + ry - 2 * zz)
+    # P is batch_size x n_points x n_points matrix of squared distances
+    return P.min(1)[0], P.min(2)[0]
+def compute_cd_hd(sample_pcs, ref_pcs, batch_size):
+    """
+    计算平均成对的Chamfer Distance (CD) 和 Hausdorff Distance (HD)。
+    """
+    print("\n--- 开始计算 平均Chamfer和Hausdorff距离 ---")
+    N_sample = sample_pcs.shape[0]
+    N_ref = ref_pcs.shape[0]
+    assert N_sample == N_ref, f"用于成对度量计算的集合大小必须相等, 但得到 {N_sample} 和 {N_ref}"
+    cd_all = []
+    hd_all = []
+    iterator = range(0, N_sample, batch_size)
+    for b_start in tqdm(iterator, desc='计算 CD/HD'):
+        b_end = min(N_sample, b_start + batch_size)
+        sample_batch = sample_pcs[b_start:b_end]
+        ref_batch = ref_pcs[b_start:b_end]
+        # distChamfer返回的是平方距离
+        dist1_sq, dist2_sq = distChamfer(sample_batch, ref_batch)
+        # 计算 Chamfer Distance
+        cd_batch = dist1_sq.mean(dim=1) + dist2_sq.mean(dim=1)
+        cd_all.append(cd_batch)
+        # 计算 Hausdorff Distance
+        # HD = max(max(min_dist_1), max(min_dist_2))
+        # 我们需要对平方距离开方来得到真实距离
+        hd_batch = torch.max(dist1_sq.max(dim=1)[0], dist2_sq.max(dim=1)[0]).sqrt()
+        hd_all.append(hd_batch)
+    cd_all = torch.cat(cd_all)
+    hd_all = torch.cat(hd_all)
+    results = {
+        'Chamfer-L2': cd_all.mean(),
+        'Hausdorff': hd_all.mean(),
+    }
+    return results
+def _pairwise_EMD_CD_(sample_pcs, ref_pcs, batch_size, verbose=True):
+    N_sample = sample_pcs.shape[0]
+    N_ref = ref_pcs.shape[0]
+    all_cd = []
+    iterator = range(N_sample)
+    if verbose:
+        iterator = tqdm(iterator, desc='计算点云间距离')
+    for i in iterator:
+        sample_batch = sample_pcs[i]
+        cd_lst = []
+        sub_iterator = range(0, N_ref, batch_size)
+        for b_start in sub_iterator:
+            b_end = min(N_ref, b_start + batch_size)
+            ref_batch = ref_pcs[b_start:b_end]
+            batch_size_ref = ref_batch.size(0)
+            sample_batch_exp = sample_batch.view(1, -1, 3).expand(batch_size_ref, -1, -1).contiguous()
+            dl, dr = distChamfer(sample_batch_exp, ref_batch)
+            cd_lst.append((dl.mean(dim=1) + dr.mean(dim=1)).view(1, -1))
+        cd_lst = torch.cat(cd_lst, dim=1)
+        all_cd.append(cd_lst)
+    all_cd = torch.cat(all_cd, dim=0)
+    # EMD is not implemented, so we return a dummy tensor for it
+    all_emd = torch.zeros_like(all_cd)
+    return all_cd, all_emd
+def knn(Mxx, Mxy, Myy, k, sqrt=False):
+    n0, n1 = Mxx.size(0), Myy.size(0)
+    device = Mxx.device
+    ones_tensor = torch.ones(n0, device=device)
+    zeros_tensor = torch.zeros(n1, device=device)
+    label = torch.cat((ones_tensor, zeros_tensor))
+    M = torch.cat([torch.cat((Mxx, Mxy), 1), torch.cat((Mxy.t(), Myy), 1)], 0)
+    if sqrt: M = M.abs().sqrt()
+    diag_inf = torch.diag(torch.full((n0 + n1,), float('inf'), device=device))
+    val, idx = (M + diag_inf).topk(k, 0, False)
+    count = torch.zeros(n0 + n1, device=device)
+    for i in range(k):
+        count.add_(label.index_select(0, idx[i]))
+    threshold = torch.full((n0 + n1,), float(k) / 2, device=device)
+    pred = (count >= threshold).float()
+    return {'acc': (label == pred).float().mean()}
+def lgan_mmd_cov(all_dist):
+    N_sample, N_ref = all_dist.shape
+    min_val, min_idx = all_dist.min(dim=1) # For each sample, find closest ref
+    mmd_smp = min_val.mean() # MMD-smp
+    min_val_ref, _ = all_dist.min(dim=0) # For each ref, find closest sample
+    mmd = min_val_ref.mean() # MMD-ref
+    cov = min_idx.unique().numel() / float(N_ref)
+    cov = torch.tensor(cov, device=all_dist.device)
+    return {'lgan_mmd': mmd, 'lgan_cov': cov}
+def compute_mmd_cov_1nna(sample_pcs, ref_pcs, batch_size):
+    results = {}
+    print("\n--- 开始计算 MMD-CD, COV-CD, 1-NNA-CD ---")
+    M_rs_cd, _ = _pairwise_EMD_CD_(ref_pcs, sample_pcs, batch_size) # ref vs sample
+    res_cd = lgan_mmd_cov(M_rs_cd.t()) # Transpose to get sample vs ref
+    results.update({f"{k}-CD": v for k, v in res_cd.items()})
+    M_rr_cd, _ = _pairwise_EMD_CD_(ref_pcs, ref_pcs, batch_size)
+    M_ss_cd, _ = _pairwise_EMD_CD_(sample_pcs, sample_pcs, batch_size)
+    one_nn_cd_res = knn(M_rr_cd, M_rs_cd, M_ss_cd, 1)
+    results.update({"1-NNA-CD": one_nn_cd_res['acc']})
+    return results
+def unit_cube_grid_point_cloud(resolution, clip_sphere=False):
+    grid = np.linspace(-0.5, 0.5, resolution)
+    x, y, z = np.meshgrid(grid, grid, grid, indexing='ij')
+    grid = np.stack([x, y, z], axis=-1).reshape(-1, 3)
+    if clip_sphere:
+        grid = grid[norm(grid, axis=1) <= 0.5]
+    return grid
+def entropy_of_occupancy_grid(pclouds, grid_resolution):
+    grid_coords = unit_cube_grid_point_cloud(grid_resolution, True)
+    grid_counters = np.zeros(len(grid_coords))
+    nn = NearestNeighbors(n_neighbors=1).fit(grid_coords)
+    for pc in tqdm(pclouds, desc='计算占据网格'):
+        _, indices = nn.kneighbors(pc)
+        indices = np.unique(indices.squeeze())
+        grid_counters[indices] += 1
+    return grid_counters
+def jensen_shannon_divergence(P, Q):
+    P_ = P / (P.sum() + 1e-9)
+    Q_ = Q / (Q.sum() + 1e-9)
+    M = 0.5 * (P_ + Q_)
+    return 0.5 * (entropy(P_, M, base=2) + entropy(Q_, M, base=2))
+def compute_jsd(sample_pcs, ref_pcs, resolution):
+    print("\n--- 开始计算 JSD ---")
+    sample_grid_dist = entropy_of_occupancy_grid(sample_pcs, resolution)
+    ref_grid_dist = entropy_of_occupancy_grid(ref_pcs, resolution)
+    jsd = jensen_shannon_divergence(sample_grid_dist, ref_grid_dist)
+    return jsd
+# ==============================================================================
+# 主执行函数 (Main Execution)
+# ==============================================================================
+if __name__ == '__main__':
+    # 1. 加载并处理Meshes为点云 (Numpy arrays)
+    sample_pcs_np = process_meshes_in_folder(GENERATED_MESH_DIR, NUM_POINTS_PER_MESH)
+    ref_pcs_np = process_meshes_in_folder(GT_MESH_DIR, NUM_POINTS_PER_MESH)
+    print(f"\n加载完成: {sample_pcs_np.shape[0]} 个生成点云, {ref_pcs_np.shape[0]} 个真实点云。")
+    print(f"每个点云包含 {sample_pcs_np.shape[1]} 个点。")
+    # 2. 设置设备并转换数据为PyTorch Tensors
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    print(f"使用设备: {device}")
+    sample_pcs_torch = torch.from_numpy(sample_pcs_np).float().to(device)
+    ref_pcs_torch = torch.from_numpy(ref_pcs_np).float().to(device)
+    # 3. 计算分布度量: MMD, COV, 1-NNA (使用PyTorch)
+    metrics_results = compute_mmd_cov_1nna(sample_pcs_torch, ref_pcs_torch, BATCH_SIZE)
+    # 4. 计算成对几何度量: CD, HD (使用PyTorch)
+    cd_hd_results = compute_cd_hd(sample_pcs_torch, ref_pcs_torch, BATCH_SIZE)
+    metrics_results.update(cd_hd_results) # 合并结果
+    # 5. 计算JSD (使用Numpy)
+    jsd_result = compute_jsd(sample_pcs_np, ref_pcs_np, JSD_RESOLUTION)
+    # 6. 打印最终结果
+    print("\n==================================================")
+    print("                  评估结果")
+    print("==================================================")
+    print("\n--- 分布质量与多样性 (Distribution Metrics) ---")
+    # MMD: 越低越好 (质量)
+    print(f"{'lgan_mmd-CD':<12s}: {metrics_results['lgan_mmd-CD'].item():.6f} (↓ Lower is better)")
+    # COV: 越高越好 (多样性)
+    print(f"{'lgan_cov-CD':<12s}: {metrics_results['lgan_cov-CD'].item():.6f} (↑ Higher is better)")
+    # 1-NNA: 越接近0.5越好 (真实性)
+    print(f"{'1-NNA-CD':<12s}: {metrics_results['1-NNA-CD'].item():.6f} (→ Closer to 0.5 is better)")
+    # JSD: 越低越好 (分布相似性)
+    print(f"{'JSD':<12s}: {jsd_result:.6f} (↓ Lower is better)")
+    print("\n--- 平均几何保真度 (Average Geometric Fidelity) ---")
+    # CD: 越低越好
+    print(f"{'Chamfer-L2':<12s}: {metrics_results['Chamfer-L2'].item():.6f} (↓ Lower is better)")
+    # HD: 越低越好
+    print(f"{'Hausdorff':<12s}: {metrics_results['Hausdorff'].item():.6f} (↓ Lower is better)")
+    print("==================================================")

metric_cd.py ADDED Viewed

	@@ -0,0 +1,190 @@

+import os
+import argparse
+import numpy as np
+import torch
+import trimesh
+from tqdm import tqdm
+# =====================================================
+# 🔹 Mesh归一化函数
+# =====================================================
+def normalize_to_unit_sphere(mesh: trimesh.Trimesh) -> trimesh.Trimesh:
+    """将mesh平移到原点并缩放到单位球内"""
+    vertices = mesh.vertices
+    centroid = vertices.mean(axis=0)
+    vertices = vertices - centroid
+    scale = np.max(np.linalg.norm(vertices, axis=1))
+    vertices = vertices / scale
+    mesh.vertices = vertices
+    return mesh
+def normalize_to_unit_cube(mesh: trimesh.Trimesh) -> trimesh.Trimesh:
+    """将mesh平移并缩放到[-1,1]^3单位立方体内"""
+    bbox_min, bbox_max = mesh.bounds
+    center = (bbox_min + bbox_max) / 2
+    scale = (bbox_max - bbox_min).max() / 2
+    mesh.vertices = (mesh.vertices - center) / scale
+    return mesh
+# =====================================================
+# 🔹 点云采样函数 + 返回面片数
+# =====================================================
+def sample_points_from_mesh(mesh_path: str, num_points: int, normalize: str = "none"):
+    """
+    从mesh文件采样点云，并可选归一化。
+    返回: (points: Tensor, face_count: int)
+    """
+    try:
+        mesh = trimesh.load(mesh_path, force='mesh', process=False)
+        if normalize == "sphere":
+            mesh = normalize_to_unit_sphere(mesh)
+        elif normalize == "cube":
+            mesh = normalize_to_unit_cube(mesh)
+        points, _ = trimesh.sample.sample_surface(mesh, num_points)
+        face_count = len(mesh.faces)
+        return torch.from_numpy(points).float(), face_count
+    except Exception as e:
+        print(f"[-] 警告：加载或采样文件失败 {mesh_path}。错误: {e}")
+        return None, 0
+# =====================================================
+# 🔹 Chamfer Distance 计算函数
+# =====================================================
+def find_minimum_cd_batched(gen_pc: torch.Tensor, gt_pcs_batch: torch.Tensor):
+    """计算生成点云到一批GT点云的最小CD及对应索引"""
+    gen_pc_batch = gen_pc.unsqueeze(0).expand(gt_pcs_batch.size(0), -1, -1)
+    dist_matrix = torch.cdist(gen_pc_batch, gt_pcs_batch)
+    min_dist_gen_to_gt = dist_matrix.min(2).values.mean(1)
+    min_dist_gt_to_gen = dist_matrix.min(1).values.mean(1)
+    cd_scores_for_one_gen = min_dist_gen_to_gt + min_dist_gt_to_gen
+    min_cd, min_idx = cd_scores_for_one_gen.min(0)
+    return min_cd.item(), min_idx.item()
+# =====================================================
+# 🔹 主流程
+# =====================================================
+def main(args):
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    print(f"[*] 使用设备: {device}")
+    print(f"[*] 归一化模式: {args.normalize}")
+    # --- Step 1: 加载GT网格并采样 ---
+    print("[*] 正在预加载并采样所有GT网格...")
+    gt_files = sorted([f for f in os.listdir(args.gt_dir) if f.endswith(('.obj', '.ply', '.off'))])
+    if not gt_files:
+        print(f"[-] 错误: GT目录中未找到mesh文件: {args.gt_dir}")
+        return
+    gt_point_clouds, gt_faces_counts = [], []
+    for gt_filename in tqdm(gt_files, desc="预处理GT网格"):
+        gt_filepath = os.path.join(args.gt_dir, gt_filename)
+        pc, fnum = sample_points_from_mesh(gt_filepath, args.num_points, args.normalize)
+        if pc is not None:
+            gt_point_clouds.append(pc.to(device))
+            gt_faces_counts.append(fnum)
+    if not gt_point_clouds:
+        print("[-] 错误: 无法从任何GT文件采样点云。")
+        return
+    print(f"[*] 成功加载 {len(gt_point_clouds)} 个GT点云。")
+    # --- Step 2: 遍历生成的网格 ---
+    gen_files = sorted([f for f in os.listdir(args.generated_dir) if f.endswith(('.obj', '.ply', '.off'))])
+    if not gen_files:
+        print(f"[-] 错误: 生成目录中未找到mesh文件: {args.generated_dir}")
+        return
+    all_min_cd_scores = []
+    face_ratios = []
+    pred_faces_all = []
+    gt_faces_matched = []
+    for gen_filename in tqdm(gen_files, desc="评估生成的网格"):
+        gen_filepath = os.path.join(args.generated_dir, gen_filename)
+        gen_pc, gen_face_count = sample_points_from_mesh(gen_filepath, args.num_points, args.normalize)
+        if gen_pc is None:
+            continue
+        gen_pc = gen_pc.to(device)
+        batch_size = args.batch_size
+        min_cd_for_this_gen = float('inf')
+        matched_gt_idx = -1
+        for i in range(0, len(gt_point_clouds), batch_size):
+            gt_pcs_batch = torch.stack(gt_point_clouds[i:i + batch_size])
+            min_cd_in_batch, idx_in_batch = find_minimum_cd_batched(gen_pc, gt_pcs_batch)
+            if min_cd_in_batch < min_cd_for_this_gen:
+                min_cd_for_this_gen = min_cd_in_batch
+                matched_gt_idx = i + idx_in_batch
+        all_min_cd_scores.append(min_cd_for_this_gen)
+        if matched_gt_idx >= 0:
+            gt_face_count = gt_faces_counts[matched_gt_idx]
+            face_ratio = gen_face_count / gt_face_count if gt_face_count > 0 else 0
+            face_ratios.append(face_ratio)
+            pred_faces_all.append(gen_face_count)
+            gt_faces_matched.append(gt_face_count)
+            if not args.quiet:
+                print(f"  -> {gen_filename}: 最小CD={min_cd_for_this_gen:.6f}, Pred面数={gen_face_count}, GT面数={gt_face_count}, 比值={face_ratio:.3f}")
+    # --- Step 3: 汇总 ---
+    if not all_min_cd_scores:
+        print("\n[-] 评估结束，但没有成功处理任何网格。")
+    else:
+        mean_min_cd = np.mean(all_min_cd_scores)
+        mean_face_ratio = np.mean(face_ratios) if face_ratios else 0
+        mean_pred_faces = np.mean(pred_faces_all) if pred_faces_all else 0
+        mean_gt_faces = np.mean(gt_faces_matched) if gt_faces_matched else 0
+        print("\n" + "="*70)
+        print(f"[*] 评估完成 (基于最小CD匹配)")
+        print(f"[*] 共评估 {len(all_min_cd_scores)} 个生成网格")
+        print(f"[*] 平均最小倒角距离 (Mean Min CD): {mean_min_cd:.6f}")
+        print(f"[*] 平均Pred面片数: {mean_pred_faces:.1f}")
+        print(f"[*] 平均GT面片数:   {mean_gt_faces:.1f}")
+        print(f"[*] 平均面片比 (Pred/GT): {mean_face_ratio:.3f}")
+        print("="*70)
+# =====================================================
+# 🔹 命令行接口
+# =====================================================
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="评估生成mesh与GT集合的最小Chamfer Distance及面片数比")
+    parser.add_argument("--generated_dir", type=str, required=True, help="生成的mesh文件夹路径")
+    parser.add_argument("--gt_dir", type=str, required=True, help="GT网格文件夹路径")
+    parser.add_argument("--num_points", type=int, default=10000, help="每个mesh采样点数")
+    parser.add_argument("--batch_size", type=int, default=16, help="与多少个GT点云进行批处理比较")
+    parser.add_argument("--normalize", type=str, default="none", choices=["none", "sphere", "cube"], help="归一化模式: none | sphere | cube")
+    parser.add_argument("--quiet", action="store_true", help="静默模式，只输出最终平均CD")
+    args = parser.parse_args()
+    main(args)
+'''
+# 不归一化
+python metric_cd.py \
+  --generated_dir /root/Trisf/experiments_edge/train_set/1e-2kl_base/epoch_20_test_set_obj_0gs \
+  --gt_dir /root/Trisf/abalation_post_processing/gt_mesh \
+  --num_points 4096 \
+  --normalize none
+# 归一化到单位球
+python metric_cd.py \
+  --generated_dir /root/Trisf/experiments_edge/train_set/1e-2kl_base/epoch_20_test_set_obj_0gs/0.8_1.5 \
+  --gt_dir /root/Trisf/abalation_post_processing/gt_mesh \
+  --num_points 4096 \
+  --normalize sphere
+# 归一化到单位立方体
+python metric_cd.py \
+  --generated_dir /root/Trisf/experiments_edge/train_set/1e-2kl_base/epoch_20_test_set_obj_0gs \
+  --gt_dir /root/Trisf/abalation_post_processing/gt_mesh \
+  --num_points 4096 \
+  --normalize cube
+'''

query_point.py ADDED Viewed

	@@ -0,0 +1,259 @@

+import torch
+import torch.nn as nn
+from torch import einsum
+import torch.nn.functional as F
+from functools import partial
+from timm.models.layers import DropPath
+from einops import rearrange, repeat
+# ---- PE: NeRF-style Position Encoding ----
+class Embedder:
+    def __init__(self, **kwargs):
+        self.kwargs = kwargs
+        self.create_embedding_fn()
+    def create_embedding_fn(self):
+        embed_fns = []
+        d = self.kwargs['input_dims']
+        out_dim = 0
+        if self.kwargs['include_input']:
+            embed_fns.append(self.identity_fn)
+            out_dim += d
+        max_freq = self.kwargs['max_freq_log2']
+        N_freqs = self.kwargs['num_freqs']
+        if self.kwargs['log_sampling']:
+            freq_bands = 2.**torch.linspace(0., max_freq, steps=N_freqs)
+        else:
+            freq_bands = torch.linspace(2.**0., 2.**max_freq, steps=N_freqs)
+        for freq in freq_bands:
+            for p_fn in self.kwargs['periodic_fns']:
+                embed_fns.append(partial(self.periodic_fn, p_fn=p_fn, freq=freq))
+                out_dim += d
+        self.embed_fns = embed_fns
+        self.out_dim = out_dim
+    def identity_fn(self, x):
+        return x
+    def periodic_fn(self, x, p_fn, freq):
+        return p_fn(x * freq)
+    def embed(self, inputs):
+        return torch.cat([fn(inputs) for fn in self.embed_fns], -1)
+def get_embedder(multires, i=0):
+    if i == -1:
+        return nn.Identity(), 1
+    embed_kwargs = {
+                'include_input': True,
+                'input_dims': 1,
+                'max_freq_log2': multires-1,
+                'num_freqs': multires,
+                'log_sampling': True,
+                'periodic_fns': [torch.sin, torch.cos],
+    }
+    embedder_obj = Embedder(**embed_kwargs)
+    embed = embedder_obj.embed
+    return embed, embedder_obj.out_dim
+class PE_NeRF(nn.Module):
+    def __init__(self, out_channels=512, multires=10):
+        super().__init__()
+        self.multires = multires
+        self.embed_fn, embed_dim_per_dim = get_embedder(multires)  # per-dim embed
+        self.embed_dim = embed_dim_per_dim * 3  # since 3D: x, y, z
+        self.coor_embed = nn.Sequential(
+            nn.Linear(self.embed_dim, 256),
+            nn.GELU(),
+            nn.Linear(256, out_channels)
+        )
+    def forward(self, vertices: torch.Tensor) -> torch.Tensor:
+        """
+        Args:
+            vertices: [B, 3] or [N, 3], coordinates in [-0.5, 0.5]
+        Returns:
+            encoded: [B, out_channels * 3]
+        """
+        x_embed = self.embed_fn(vertices[..., 0:1])  # [N, D]
+        y_embed = self.embed_fn(vertices[..., 1:2])
+        z_embed = self.embed_fn(vertices[..., 2:3])
+        pos_enc = torch.cat([x_embed, y_embed, z_embed], dim=-1)  # [N, D * 3]
+        return self.coor_embed(pos_enc)
+def exists(val):
+    return val is not None
+def default(val, d):
+    return val if exists(val) else d
+# ---- Attention & FF blocks ----
+class GEGLU(nn.Module):
+    def forward(self, x):
+        x, gate = x.chunk(2, dim=-1)
+        return x * F.gelu(gate)
+class FeedForward(nn.Module):
+    def __init__(self, dim, mult=4):
+        super().__init__()
+        self.net = nn.Sequential(
+            nn.Linear(dim, dim * mult * 2),
+            GEGLU(),
+            nn.Linear(dim * mult, dim)
+        )
+    def forward(self, x):
+        return self.net(x)
+class PreNorm(nn.Module):
+    def __init__(self, dim, fn, context_dim = None):
+        super().__init__()
+        self.fn = fn
+        self.norm = nn.LayerNorm(dim)
+        self.norm_context = nn.LayerNorm(context_dim) if exists(context_dim) else None
+    def forward(self, x, **kwargs):
+        x = self.norm(x)
+        if exists(self.norm_context):
+            context = kwargs['context']
+            normed_context = self.norm_context(context)
+            kwargs.update(context = normed_context)
+        return self.fn(x, **kwargs)
+class Attention(nn.Module):
+    def __init__(self, query_dim, context_dim = None, heads = 8, dim_head = 64, drop_path_rate = 0.0):
+        super().__init__()
+        inner_dim = dim_head * heads
+        context_dim = default(context_dim, query_dim)
+        self.scale = dim_head ** -0.5
+        self.heads = heads
+        self.to_q = nn.Linear(query_dim, inner_dim, bias = False)
+        self.to_kv = nn.Linear(context_dim, inner_dim * 2, bias = False)
+        self.to_out = nn.Linear(inner_dim, query_dim)
+        self.drop_path = DropPath(drop_path_rate) if drop_path_rate > 0. else nn.Identity()
+    def forward(self, x, context = None, mask = None):
+        h = self.heads
+        q = self.to_q(x)
+        context = default(context, x)
+        k, v = self.to_kv(context).chunk(2, dim = -1)
+        q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> (b h) n d', h = h), (q, k, v))
+        sim = einsum('b i d, b j d -> b i j', q, k) * self.scale
+        if exists(mask):
+            mask = rearrange(mask, 'b ... -> b (...)')
+            max_neg_value = -torch.finfo(sim.dtype).max
+            mask = repeat(mask, 'b j -> (b h) () j', h = h)
+            sim.masked_fill_(~mask, max_neg_value)
+        # attention, what we cannot get enough of
+        attn = sim.softmax(dim = -1)
+        out = einsum('b i j, b j d -> b i d', attn, v)
+        out = rearrange(out, '(b h) n d -> b n (h d)', h = h)
+        return self.drop_path(self.to_out(out))
+class QueryPointDecoder(nn.Module):
+    def __init__(self, query_dim=1536, context_dim=512, output_dim=1, depth=8,
+                 using_nerf=True, quantize_bits=10, dim=512, heads=8, multires=10):
+        super().__init__()
+        self.using_nerf = using_nerf
+        self.depth = depth
+        if using_nerf:
+            self.pe = PE_NeRF(out_channels=query_dim, multires=multires)
+        else:
+            self.embedding_x = nn.Embedding(2**quantize_bits, query_dim // 3)
+            self.embedding_y = nn.Embedding(2**quantize_bits, query_dim // 3)
+            self.embedding_z = nn.Embedding(2**quantize_bits, query_dim // 3)
+            self.coord_proj = nn.Sequential(
+                nn.Linear(query_dim, query_dim * 4),
+                nn.GELU(),
+                nn.Linear(query_dim * 4, query_dim)
+            )
+        # self.context_proj = nn.Linear(context_dim, query_dim)
+        self.context_proj = nn.Linear(context_dim, dim)
+        self.pe_ctx = PE_NeRF(out_channels=dim, multires=multires)
+        self.context_self_attn_layers = nn.ModuleList([
+            nn.ModuleList([
+                PreNorm(dim, Attention(dim, dim_head=64, heads=heads)),
+                PreNorm(dim, FeedForward(dim))
+            ]) for _ in range(depth)
+        ])
+        self.cross_attn = PreNorm(dim,
+                                Attention(dim, dim,
+                                        dim_head=dim, heads=1))
+        self.cross_ff = PreNorm(dim, FeedForward(dim))
+        self.to_outputs = nn.Linear(dim, output_dim)
+    def forward(self, query_points, context_feats, context_mask=None, voxels_coords=None,):
+        B, N, _ = query_points.shape
+        if self.using_nerf:
+            # print('query_points.min()', query_points.min())
+            # print('query_points.max()', query_points.max())
+            x = self.pe(query_points.view(-1, 3)).view(B, N, -1)
+        else:
+            embeddings = torch.cat([
+                self.embedding_x(query_points[..., 0]),
+                self.embedding_y(query_points[..., 1]),
+                self.embedding_z(query_points[..., 2]),
+            ], dim=-1)
+            x = self.coord_proj(embeddings)
+        context = self.context_proj(context_feats)
+        if voxels_coords is not None:
+            M = voxels_coords.shape[1]
+            normalized_coords = 2.0 * (voxels_coords.float() / 1024.) - 1.0
+            context += self.pe_ctx(normalized_coords.view(-1, 3)).view(B, M, -1)
+        attn_mask = context_mask[:, None, None, :] if context_mask is not None else None
+        for self_attn, ff in self.context_self_attn_layers:
+            context = self_attn(context, mask=attn_mask) + context
+            context = ff(context) + context
+        latents = self.cross_attn(x, context=context, mask=attn_mask)
+        latents = self.cross_ff(x) + latents
+        return self.to_outputs(latents).squeeze(-1)
+if __name__ == '__main__':
+    torch.manual_seed(42)
+    model = QueryPointDecoder().cuda()
+    model.eval()
+    B, N, M = 2, 64, 20
+    query_pts = torch.rand(B, N, 3).cuda() - 0.5  # [-0.5, 0.5]
+    context_feats = torch.randn(B, M, 512).cuda()
+    with torch.no_grad():
+        logits = model(query_pts, context_feats)
+        print("Logits shape:", logits.shape)  # [B, N, 1]

test_slat_flow_128to1024_pointnet.py ADDED Viewed

	@@ -0,0 +1,403 @@

+import os
+import numpy as np
+import torch
+import torch.nn as nn
+import yaml
+import time
+from datetime import datetime
+from torch.utils.data import DataLoader
+from functools import partial
+import torch.nn.functional as F
+from torch.amp import GradScaler, autocast
+from typing import *
+from transformers import CLIPTextModel, AutoTokenizer, CLIPTextConfig, Dinov2Model, AutoImageProcessor, Dinov2Config
+import torch
+import re
+from utils import load_pretrained_woself
+from dataset_triposf import VoxelVertexDataset_edge, collate_fn_pointnet
+from triposf.models.triposf_vae.VoxelFeatureVAE_edge_woself_128to1024_decoder_head import VoxelVAE
+from vertex_encoder import VoxelFeatureEncoder_active_pointnet
+from trellis.models.structured_latent_flow import SLatFlowModel
+from trellis.trainers.flow_matching.sparse_flow_matching_alone import SparseFlowMatchingTrainer
+from trellis.pipelines.samplers import FlowEulerSampler
+from safetensors.torch import load_file
+import open3d as o3d
+from PIL import Image
+from triposf.modules.sparse.basic import SparseTensor
+from trellis.modules.sparse.basic import SparseTensor as SparseTensor_trellis
+from triposf.modules.utils import DiagonalGaussianDistribution
+from sklearn.decomposition import PCA
+import trimesh
+import torchvision.transforms as transforms
+# --- Helper Functions ---
+def save_colored_ply(points, colors, filename):
+    if len(points) == 0:
+        print(f"[Warning] No points to save for {filename}")
+        return
+    # Ensure colors are uint8
+    if colors.max() <= 1.0:
+        colors = (colors * 255).astype(np.uint8)
+    colors = colors.astype(np.uint8)
+    # Add Alpha if missing
+    if colors.shape[1] == 3:
+        colors = np.hstack([colors, np.full((len(colors), 1), 255, dtype=np.uint8)])
+    cloud = trimesh.PointCloud(points, colors=colors)
+    cloud.export(filename)
+    print(f"Saved colored point cloud to {filename}")
+def normalize_to_rgb(features_3d):
+    min_vals = features_3d.min(axis=0)
+    max_vals = features_3d.max(axis=0)
+    range_vals = max_vals - min_vals
+    range_vals[range_vals == 0] = 1
+    normalized = (features_3d - min_vals) / range_vals
+    return (normalized * 255).astype(np.uint8)
+class SLatFlowMatchingTrainer(SparseFlowMatchingTrainer):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.cfg = kwargs.pop('cfg', None)
+        if self.cfg is None:
+            raise ValueError("Configuration dictionary 'cfg' must be provided.")
+        self.sampler = FlowEulerSampler(sigma_min=1.e-5)
+        self.device = torch.device("cuda")
+        # Based on PointNet Encoder setting
+        self.resolution = 128
+        self.condition_type = 'image'
+        self.is_cond = False
+        self.img_res = 518
+        self.feature_dim = self.cfg['model']['latent_dim']
+        self._init_components(
+            clip_model_path=self.cfg['training'].get('clip_model_path', None),
+            dinov2_model_path=self.cfg['training'].get('dinov2_model_path', None),
+            vae_path=self.cfg['training']['vae_path'],
+        )
+        # Classifier head removed as it is not part of the Active Voxel pipeline
+    def _load_denoiser(self, denoiser_checkpoint_path):
+        path = denoiser_checkpoint_path
+        if not path or not os.path.isfile(path):
+            print("No valid checkpoint path provided for fine-tuning. Starting from scratch.")
+            return
+        print(f"Loading checkpoint from: {path}")
+        checkpoint = torch.load(path, map_location=self.device)
+        try:
+            denoiser_state_dict = checkpoint['denoiser']
+            # Handle DDP prefix
+            if next(iter(denoiser_state_dict)).startswith('module.'):
+                denoiser_state_dict = {k[7:]: v for k, v in denoiser_state_dict.items()}
+            self.denoiser.load_state_dict(denoiser_state_dict)
+            print("Denoiser weights loaded successfully.")
+        except KeyError:
+            print("[WARN] 'denoiser' key not found in checkpoint. Skipping.")
+        except Exception as e:
+            print(f"[ERROR] Failed to load denoiser state_dict: {e}")
+    def _init_components(self,
+            clip_model_path=None,
+            dinov2_model_path=None,
+            vae_path=None,
+        ):
+        # 1. Initialize PointNet Voxel Encoder (Matches Training)
+        self.voxel_encoder = VoxelFeatureEncoder_active_pointnet(
+            in_channels=15,
+            hidden_dim=256,
+            out_channels=1024,
+            scatter_type='mean',
+            n_blocks=5,
+            resolution=128,
+            add_label=False,
+        ).to(self.device)
+        # 2. Initialize VAE
+        self.vae = VoxelVAE(
+            in_channels=self.cfg['model']['in_channels'],
+            latent_dim=self.cfg['model']['latent_dim'],
+            encoder_blocks=self.cfg['model']['encoder_blocks'],
+            decoder_blocks_vtx=self.cfg['model']['decoder_blocks_vtx'],
+            decoder_blocks_edge=self.cfg['model']['decoder_blocks_edge'],
+            num_heads=8,
+            num_head_channels=64,
+            mlp_ratio=4.0,
+            attn_mode="swin",
+            window_size=8,
+            pe_mode="ape",
+            use_fp16=False,
+            use_checkpoint=False,
+            qk_rms_norm=False,
+            using_subdivide=True,
+            using_attn=self.cfg['model']['using_attn'],
+            attn_first=self.cfg['model'].get('attn_first', True),
+            pred_direction=self.cfg['model'].get('pred_direction', False),
+        ).to(self.device)
+        # 3. Initialize Dataset with collate_fn_pointnet
+        self.dataset = VoxelVertexDataset_edge(
+            root_dir=self.cfg['dataset']['path'],
+            base_resolution=self.cfg['dataset']['base_resolution'],
+            min_resolution=self.cfg['dataset']['min_resolution'],
+            cache_dir=self.cfg['dataset']['cache_dir'],
+            renders_dir=self.cfg['dataset']['renders_dir'],
+            process_img=False,
+            active_voxel_res=128,
+            filter_active_voxels=self.cfg['dataset']['filter_active_voxels'],
+            cache_filter_path=self.cfg['dataset']['cache_filter_path'],
+            sample_type=self.cfg['dataset'].get('sample_type', 'dora'),
+        )
+        self.dataloader = DataLoader(
+            self.dataset,
+            batch_size=1,
+            shuffle=True,
+            collate_fn=partial(collate_fn_pointnet,), # Critical Change
+            num_workers=4,
+            pin_memory=True,
+            persistent_workers=True,
+        )
+        # 4. Load Pretrained Weights
+        # Assuming vae_path contains 'voxel_encoder' and 'vae'
+        print(f"Loading VAE/Encoder from {vae_path}")
+        ckpt = torch.load(vae_path, map_location='cpu')
+        # Load VAE
+        if 'vae' in ckpt:
+            self.vae.load_state_dict(ckpt['vae'], strict=False)
+        else:
+            self.vae.load_state_dict(ckpt) # Fallback
+        # Load Encoder
+        if 'voxel_encoder' in ckpt:
+            self.voxel_encoder.load_state_dict(ckpt['voxel_encoder'])
+        else:
+            print("[WARN] 'voxel_encoder' not found in checkpoint, random init (BAD for inference).")
+        self.voxel_encoder.eval()
+        self.vae.eval()
+        # 5. Initialize Conditioning Model
+        if self.condition_type == 'text':
+            self.tokenizer = AutoTokenizer.from_pretrained(clip_model_path)
+            self.condition_model = CLIPTextModel.from_pretrained(clip_model_path)
+        elif self.condition_type == 'image':
+            model_name = 'dinov2_vitl14_reg'
+            local_repo_path = "/gemini/user/private/zhaotianhao/dinov2_resources/dinov2-main"
+            weights_path = "/gemini/user/private/zhaotianhao/dinov2_resources/dinov2_vitl14_reg4_pretrain.pth"
+            dinov2_model = torch.hub.load(
+                repo_or_dir=local_repo_path,
+                model=model_name,
+                source='local',
+                pretrained=False
+            )
+            self.condition_model = dinov2_model
+            self.condition_model.load_state_dict(torch.load(weights_path))
+            self.image_cond_model_transform = transforms.Compose([
+                transforms.ToTensor(),
+                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+            ])
+        else:
+            raise ValueError(f"Unsupported condition type: {self.condition_type}")
+        self.condition_model.to(self.device).eval()
+    @torch.no_grad()
+    def encode_image(self, images) -> torch.Tensor:
+        if isinstance(images, torch.Tensor):
+            batch_tensor = images.to(self.device)
+        elif isinstance(images, list):
+            assert all(isinstance(i, Image.Image) for i in images)
+            image = [i.resize((518, 518), Image.LANCZOS) for i in images]
+            image = [np.array(i.convert('RGB')).astype(np.float32) / 255 for i in image]
+            image = [torch.from_numpy(i).permute(2, 0, 1).float() for i in image]
+            batch_tensor = torch.stack(image).to(self.device)
+        else:
+            raise ValueError(f"Unsupported type of image: {type(images)}")
+        if batch_tensor.shape[-2:] != (518, 518):
+             batch_tensor = F.interpolate(batch_tensor, (518, 518), mode='bicubic', align_corners=False)
+        features = self.condition_model(batch_tensor, is_training=True)['x_prenorm']
+        patchtokens = F.layer_norm(features, features.shape[-1:])
+        return patchtokens
+    def process_batch(self, batch):
+        preprocessed_images = batch['image']
+        cond_ = self.encode_image(preprocessed_images)
+        return cond_
+    def eval(self):
+        # Unconditional Setup
+        if self.is_cond == False:
+            if self.condition_type == 'text':
+                txt = ['']
+                encoding = self.tokenizer(txt, max_length=77, padding='max_length', truncation=True, return_tensors='pt')
+                tokens = encoding['input_ids'].to(self.device)
+                with torch.no_grad():
+                    cond_ = self.condition_model(input_ids=tokens).last_hidden_state
+            else:
+                blank_img = Image.fromarray(np.zeros((self.img_res, self.img_res, 3), dtype=np.uint8))
+                with torch.no_grad():
+                    dummy_cond = self.encode_image([blank_img])
+                    cond_ = torch.zeros_like(dummy_cond)
+                print(f"Generated unconditional image prompt (zero tensor) with shape: {cond_.shape}")
+        self.denoiser.eval()
+        # Load Denoiser Checkpoint
+        # Update this path to your ACTIVE VOXEL trained checkpoint
+        checkpoint_path = '/gemini/user/private/zhaotianhao/output_slat_flow_matching_active/ckpts/8780_complex_128to1024_rope/checkpoint_step90000_loss0_694290.pt'
+        self._load_denoiser(checkpoint_path)
+        filename = os.path.basename(checkpoint_path)
+        match = re.search(r'step(\d+)', filename)
+        step_str = match.group(1) if match else "eval"
+        save_dir = os.path.join(os.path.dirname(checkpoint_path), f"{step_str}_sample_active_vis_42seed_1000complex")
+        os.makedirs(save_dir, exist_ok=True)
+        print(f"Results will be saved to: {save_dir}")
+        for i, batch in enumerate(self.dataloader):
+            if i > 50: exit() # Visualize first 10
+            if self.is_cond and self.condition_type == 'image':
+                cond_ = self.process_batch(batch)
+            if cond_.shape[0] != 1:
+                cond_ = cond_.expand(batch['active_voxels_128'].shape[0], -1, -1).contiguous().to(self.device)
+            else:
+                cond_ = cond_.to(self.device)
+            # --- Data Retrieval (Matches collate_fn_pointnet) ---
+            point_cloud = batch['point_cloud_128'].to(self.device)
+            active_coords = batch['active_voxels_128'].to(self.device) # [N, 4]
+            with autocast(device_type='cuda', dtype=torch.bfloat16):
+                with torch.no_grad():
+                    # 1. Encode Ground Truth Latents
+                    active_voxel_feats = self.voxel_encoder(
+                        p=point_cloud,
+                        sparse_coords=active_coords,
+                        res=128,
+                        bbox_size=(-0.5, 0.5),
+                    )
+                    sparse_input = SparseTensor(
+                        feats=active_voxel_feats,
+                        coords=active_coords.int()
+                    )
+                    # Encode to get GT distribution
+                    gt_latents, posterior = self.vae.encode(sparse_input)
+                    print(f"Batch {i}: Active voxels: {active_coords.shape[0]}")
+                    # 2. Generation / Sampling
+                    # Generate noise on the SAME active coordinates
+                    noise = SparseTensor_trellis(
+                        coords=active_coords.int(),
+                        feats=torch.randn(
+                            active_coords.shape[0],
+                            self.feature_dim,
+                            device=self.device,
+                        ),
+                    )
+                    sample_results = self.sampler.sample(
+                        model=self.denoiser.float(),
+                        noise=noise.to(self.device).float(),
+                        cond=cond_.to(self.device).float(),
+                        steps=50,
+                        rescale_t=1.0,
+                        verbose=True,
+                    )
+                    generated_sparse_tensor = sample_results.samples
+                    generated_coords = generated_sparse_tensor.coords
+                    generated_features = generated_sparse_tensor.feats
+                    print('Gen features mean:', generated_features.mean().item(), 'std:', generated_features.std().item())
+                    print('GT features mean:', gt_latents.feats.mean().item(), 'std:', gt_latents.feats.std().item())
+                    print('MSE:', F.mse_loss(generated_features, gt_latents.feats).item())
+            # --- Visualization (PCA) ---
+            gt_feats_np = gt_latents.feats.detach().cpu().numpy()
+            gen_feats_np = generated_features.detach().cpu().numpy()
+            coords_np = active_coords[:, 1:4].detach().cpu().numpy() # x, y, z
+            print("Visualizing features using PCA...")
+            pca = PCA(n_components=3)
+            # Fit PCA on GT, transform both
+            pca.fit(gt_feats_np)
+            gt_feats_3d = pca.transform(gt_feats_np)
+            gen_feats_3d = pca.transform(gen_feats_np)
+            gt_colors = normalize_to_rgb(gt_feats_3d)
+            gen_colors = normalize_to_rgb(gen_feats_3d)
+            # Save PLYs
+            save_colored_ply(coords_np, gt_colors, os.path.join(save_dir, f"batch_{i}_gt_pca.ply"))
+            save_colored_ply(coords_np, gen_colors, os.path.join(save_dir, f"batch_{i}_gen_pca.ply"))
+            # Save Tensors for further analysis
+            torch.save(gt_latents, os.path.join(save_dir, f"gt_latent_{i}.pt"))
+            torch.save(batch, os.path.join(save_dir, f"gt_data_batch_{i}.pt"))
+            torch.save(sample_results.samples, os.path.join(save_dir, f"sample_latent_{i}.pt"))
+if __name__ == '__main__':
+    torch.manual_seed(42)
+    config_path = "/gemini/user/private/zhaotianhao/Triposf/config_slat_flow_128to1024_pointnet_test.yaml"
+    with open(config_path) as f:
+        cfg = yaml.safe_load(f)
+    # Initialize Model on CPU first
+    diffusion_model = SLatFlowModel(
+        resolution=cfg['flow']['resolution'],
+        in_channels=cfg['flow']['in_channels'],
+        out_channels=cfg['flow']['out_channels'],
+        model_channels=cfg['flow']['model_channels'],
+        cond_channels=cfg['flow']['cond_channels'],
+        num_blocks=cfg['flow']['num_blocks'],
+        num_heads=cfg['flow']['num_heads'],
+        mlp_ratio=cfg['flow']['mlp_ratio'],
+        patch_size=cfg['flow']['patch_size'],
+        num_io_res_blocks=cfg['flow']['num_io_res_blocks'],
+        io_block_channels=cfg['flow']['io_block_channels'],
+        pe_mode=cfg['flow']['pe_mode'],
+        qk_rms_norm=cfg['flow']['qk_rms_norm'],
+        qk_rms_norm_cross=cfg['flow']['qk_rms_norm_cross'],
+        use_fp16=cfg['flow'].get('use_fp16', False),
+    ).to("cuda" if torch.cuda.is_available() else "cpu")
+    trainer = SLatFlowMatchingTrainer(
+        denoiser=diffusion_model,
+        t_schedule=cfg['t_schedule'],
+        sigma_min=cfg['sigma_min'],
+        cfg=cfg,
+    )
+    trainer.eval()

test_slat_flow_128to256_pointnet.py ADDED Viewed

	@@ -0,0 +1,403 @@

+import os
+import numpy as np
+import torch
+import torch.nn as nn
+import yaml
+import time
+from datetime import datetime
+from torch.utils.data import DataLoader
+from functools import partial
+import torch.nn.functional as F
+from torch.amp import GradScaler, autocast
+from typing import *
+from transformers import CLIPTextModel, AutoTokenizer, CLIPTextConfig, Dinov2Model, AutoImageProcessor, Dinov2Config
+import torch
+import re
+from utils import load_pretrained_woself
+from dataset_triposf import VoxelVertexDataset_edge, collate_fn_pointnet
+from triposf.models.triposf_vae.VoxelFeatureVAE_edge_woself_128to1024_decoder_head import VoxelVAE
+from vertex_encoder import VoxelFeatureEncoder_active_pointnet
+from trellis.models.structured_latent_flow import SLatFlowModel
+from trellis.trainers.flow_matching.sparse_flow_matching_alone import SparseFlowMatchingTrainer
+from trellis.pipelines.samplers import FlowEulerSampler
+from safetensors.torch import load_file
+import open3d as o3d
+from PIL import Image
+from triposf.modules.sparse.basic import SparseTensor
+from trellis.modules.sparse.basic import SparseTensor as SparseTensor_trellis
+from triposf.modules.utils import DiagonalGaussianDistribution
+from sklearn.decomposition import PCA
+import trimesh
+import torchvision.transforms as transforms
+# --- Helper Functions ---
+def save_colored_ply(points, colors, filename):
+    if len(points) == 0:
+        print(f"[Warning] No points to save for {filename}")
+        return
+    # Ensure colors are uint8
+    if colors.max() <= 1.0:
+        colors = (colors * 255).astype(np.uint8)
+    colors = colors.astype(np.uint8)
+    # Add Alpha if missing
+    if colors.shape[1] == 3:
+        colors = np.hstack([colors, np.full((len(colors), 1), 255, dtype=np.uint8)])
+    cloud = trimesh.PointCloud(points, colors=colors)
+    cloud.export(filename)
+    print(f"Saved colored point cloud to {filename}")
+def normalize_to_rgb(features_3d):
+    min_vals = features_3d.min(axis=0)
+    max_vals = features_3d.max(axis=0)
+    range_vals = max_vals - min_vals
+    range_vals[range_vals == 0] = 1
+    normalized = (features_3d - min_vals) / range_vals
+    return (normalized * 255).astype(np.uint8)
+class SLatFlowMatchingTrainer(SparseFlowMatchingTrainer):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.cfg = kwargs.pop('cfg', None)
+        if self.cfg is None:
+            raise ValueError("Configuration dictionary 'cfg' must be provided.")
+        self.sampler = FlowEulerSampler(sigma_min=1.e-5)
+        self.device = torch.device("cuda")
+        # Based on PointNet Encoder setting
+        self.resolution = 128
+        self.condition_type = 'image'
+        self.is_cond = False
+        self.img_res = 518
+        self.feature_dim = self.cfg['model']['latent_dim']
+        self._init_components(
+            clip_model_path=self.cfg['training'].get('clip_model_path', None),
+            dinov2_model_path=self.cfg['training'].get('dinov2_model_path', None),
+            vae_path=self.cfg['training']['vae_path'],
+        )
+        # Classifier head removed as it is not part of the Active Voxel pipeline
+    def _load_denoiser(self, denoiser_checkpoint_path):
+        path = denoiser_checkpoint_path
+        if not path or not os.path.isfile(path):
+            print("No valid checkpoint path provided for fine-tuning. Starting from scratch.")
+            return
+        print(f"Loading checkpoint from: {path}")
+        checkpoint = torch.load(path, map_location=self.device)
+        try:
+            denoiser_state_dict = checkpoint['denoiser']
+            # Handle DDP prefix
+            if next(iter(denoiser_state_dict)).startswith('module.'):
+                denoiser_state_dict = {k[7:]: v for k, v in denoiser_state_dict.items()}
+            self.denoiser.load_state_dict(denoiser_state_dict)
+            print("Denoiser weights loaded successfully.")
+        except KeyError:
+            print("[WARN] 'denoiser' key not found in checkpoint. Skipping.")
+        except Exception as e:
+            print(f"[ERROR] Failed to load denoiser state_dict: {e}")
+    def _init_components(self,
+            clip_model_path=None,
+            dinov2_model_path=None,
+            vae_path=None,
+        ):
+        # 1. Initialize PointNet Voxel Encoder (Matches Training)
+        self.voxel_encoder = VoxelFeatureEncoder_active_pointnet(
+            in_channels=15,
+            hidden_dim=256,
+            out_channels=1024,
+            scatter_type='mean',
+            n_blocks=5,
+            resolution=128,
+            add_label=False,
+        ).to(self.device)
+        # 2. Initialize VAE
+        self.vae = VoxelVAE(
+            in_channels=self.cfg['model']['in_channels'],
+            latent_dim=self.cfg['model']['latent_dim'],
+            encoder_blocks=self.cfg['model']['encoder_blocks'],
+            decoder_blocks_vtx=self.cfg['model']['decoder_blocks_vtx'],
+            decoder_blocks_edge=self.cfg['model']['decoder_blocks_edge'],
+            num_heads=8,
+            num_head_channels=64,
+            mlp_ratio=4.0,
+            attn_mode="swin",
+            window_size=8,
+            pe_mode="ape",
+            use_fp16=False,
+            use_checkpoint=False,
+            qk_rms_norm=False,
+            using_subdivide=True,
+            using_attn=self.cfg['model']['using_attn'],
+            attn_first=self.cfg['model'].get('attn_first', True),
+            pred_direction=self.cfg['model'].get('pred_direction', False),
+        ).to(self.device)
+        # 3. Initialize Dataset with collate_fn_pointnet
+        self.dataset = VoxelVertexDataset_edge(
+            root_dir=self.cfg['dataset']['path'],
+            base_resolution=self.cfg['dataset']['base_resolution'],
+            min_resolution=self.cfg['dataset']['min_resolution'],
+            cache_dir=self.cfg['dataset']['cache_dir'],
+            renders_dir=self.cfg['dataset']['renders_dir'],
+            process_img=False,
+            active_voxel_res=128,
+            filter_active_voxels=self.cfg['dataset']['filter_active_voxels'],
+            cache_filter_path=self.cfg['dataset']['cache_filter_path'],
+            sample_type=self.cfg['dataset'].get('sample_type', 'dora'),
+        )
+        self.dataloader = DataLoader(
+            self.dataset,
+            batch_size=1,
+            shuffle=True,
+            collate_fn=partial(collate_fn_pointnet,), # Critical Change
+            num_workers=4,
+            pin_memory=True,
+            persistent_workers=True,
+        )
+        # 4. Load Pretrained Weights
+        # Assuming vae_path contains 'voxel_encoder' and 'vae'
+        print(f"Loading VAE/Encoder from {vae_path}")
+        ckpt = torch.load(vae_path, map_location='cpu')
+        # Load VAE
+        if 'vae' in ckpt:
+            self.vae.load_state_dict(ckpt['vae'], strict=False)
+        else:
+            self.vae.load_state_dict(ckpt) # Fallback
+        # Load Encoder
+        if 'voxel_encoder' in ckpt:
+            self.voxel_encoder.load_state_dict(ckpt['voxel_encoder'])
+        else:
+            print("[WARN] 'voxel_encoder' not found in checkpoint, random init (BAD for inference).")
+        self.voxel_encoder.eval()
+        self.vae.eval()
+        # 5. Initialize Conditioning Model
+        if self.condition_type == 'text':
+            self.tokenizer = AutoTokenizer.from_pretrained(clip_model_path)
+            self.condition_model = CLIPTextModel.from_pretrained(clip_model_path)
+        elif self.condition_type == 'image':
+            model_name = 'dinov2_vitl14_reg'
+            local_repo_path = "/gemini/user/private/zhaotianhao/dinov2_resources/dinov2-main"
+            weights_path = "/gemini/user/private/zhaotianhao/dinov2_resources/dinov2_vitl14_reg4_pretrain.pth"
+            dinov2_model = torch.hub.load(
+                repo_or_dir=local_repo_path,
+                model=model_name,
+                source='local',
+                pretrained=False
+            )
+            self.condition_model = dinov2_model
+            self.condition_model.load_state_dict(torch.load(weights_path))
+            self.image_cond_model_transform = transforms.Compose([
+                transforms.ToTensor(),
+                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+            ])
+        else:
+            raise ValueError(f"Unsupported condition type: {self.condition_type}")
+        self.condition_model.to(self.device).eval()
+    @torch.no_grad()
+    def encode_image(self, images) -> torch.Tensor:
+        if isinstance(images, torch.Tensor):
+            batch_tensor = images.to(self.device)
+        elif isinstance(images, list):
+            assert all(isinstance(i, Image.Image) for i in images)
+            image = [i.resize((518, 518), Image.LANCZOS) for i in images]
+            image = [np.array(i.convert('RGB')).astype(np.float32) / 255 for i in image]
+            image = [torch.from_numpy(i).permute(2, 0, 1).float() for i in image]
+            batch_tensor = torch.stack(image).to(self.device)
+        else:
+            raise ValueError(f"Unsupported type of image: {type(images)}")
+        if batch_tensor.shape[-2:] != (518, 518):
+             batch_tensor = F.interpolate(batch_tensor, (518, 518), mode='bicubic', align_corners=False)
+        features = self.condition_model(batch_tensor, is_training=True)['x_prenorm']
+        patchtokens = F.layer_norm(features, features.shape[-1:])
+        return patchtokens
+    def process_batch(self, batch):
+        preprocessed_images = batch['image']
+        cond_ = self.encode_image(preprocessed_images)
+        return cond_
+    def eval(self):
+        # Unconditional Setup
+        if self.is_cond == False:
+            if self.condition_type == 'text':
+                txt = ['']
+                encoding = self.tokenizer(txt, max_length=77, padding='max_length', truncation=True, return_tensors='pt')
+                tokens = encoding['input_ids'].to(self.device)
+                with torch.no_grad():
+                    cond_ = self.condition_model(input_ids=tokens).last_hidden_state
+            else:
+                blank_img = Image.fromarray(np.zeros((self.img_res, self.img_res, 3), dtype=np.uint8))
+                with torch.no_grad():
+                    dummy_cond = self.encode_image([blank_img])
+                    cond_ = torch.zeros_like(dummy_cond)
+                print(f"Generated unconditional image prompt (zero tensor) with shape: {cond_.shape}")
+        self.denoiser.eval()
+        # Load Denoiser Checkpoint
+        # Update this path to your ACTIVE VOXEL trained checkpoint
+        checkpoint_path = '/gemini/user/private/zhaotianhao/checkpoints/output_slat_flow_matching_active/8w_128to256_head_rope/checkpoint_step215000_loss0_332666.pt'
+        self._load_denoiser(checkpoint_path)
+        filename = os.path.basename(checkpoint_path)
+        match = re.search(r'step(\d+)', filename)
+        step_str = match.group(1) if match else "eval"
+        save_dir = os.path.join(os.path.dirname(checkpoint_path), f"{step_str}_sample_active_vis_42seed_1000complex")
+        os.makedirs(save_dir, exist_ok=True)
+        print(f"Results will be saved to: {save_dir}")
+        for i, batch in enumerate(self.dataloader):
+            if i > 50: exit() # Visualize first 10
+            if self.is_cond and self.condition_type == 'image':
+                cond_ = self.process_batch(batch)
+            if cond_.shape[0] != 1:
+                cond_ = cond_.expand(batch['active_voxels_128'].shape[0], -1, -1).contiguous().to(self.device)
+            else:
+                cond_ = cond_.to(self.device)
+            # --- Data Retrieval (Matches collate_fn_pointnet) ---
+            point_cloud = batch['point_cloud_128'].to(self.device)
+            active_coords = batch['active_voxels_128'].to(self.device) # [N, 4]
+            with autocast(device_type='cuda', dtype=torch.bfloat16):
+                with torch.no_grad():
+                    # 1. Encode Ground Truth Latents
+                    active_voxel_feats = self.voxel_encoder(
+                        p=point_cloud,
+                        sparse_coords=active_coords,
+                        res=128,
+                        bbox_size=(-0.5, 0.5),
+                    )
+                    sparse_input = SparseTensor(
+                        feats=active_voxel_feats,
+                        coords=active_coords.int()
+                    )
+                    # Encode to get GT distribution
+                    gt_latents, posterior = self.vae.encode(sparse_input)
+                    print(f"Batch {i}: Active voxels: {active_coords.shape[0]}")
+                    # 2. Generation / Sampling
+                    # Generate noise on the SAME active coordinates
+                    noise = SparseTensor_trellis(
+                        coords=active_coords.int(),
+                        feats=torch.randn(
+                            active_coords.shape[0],
+                            self.feature_dim,
+                            device=self.device,
+                        ),
+                    )
+                    sample_results = self.sampler.sample(
+                        model=self.denoiser.float(),
+                        noise=noise.to(self.device).float(),
+                        cond=cond_.to(self.device).float(),
+                        steps=50,
+                        rescale_t=1.0,
+                        verbose=True,
+                    )
+                    generated_sparse_tensor = sample_results.samples
+                    generated_coords = generated_sparse_tensor.coords
+                    generated_features = generated_sparse_tensor.feats
+                    print('Gen features mean:', generated_features.mean().item(), 'std:', generated_features.std().item())
+                    print('GT features mean:', gt_latents.feats.mean().item(), 'std:', gt_latents.feats.std().item())
+                    print('MSE:', F.mse_loss(generated_features, gt_latents.feats).item())
+            # --- Visualization (PCA) ---
+            gt_feats_np = gt_latents.feats.detach().cpu().numpy()
+            gen_feats_np = generated_features.detach().cpu().numpy()
+            coords_np = active_coords[:, 1:4].detach().cpu().numpy() # x, y, z
+            print("Visualizing features using PCA...")
+            pca = PCA(n_components=3)
+            # Fit PCA on GT, transform both
+            pca.fit(gt_feats_np)
+            gt_feats_3d = pca.transform(gt_feats_np)
+            gen_feats_3d = pca.transform(gen_feats_np)
+            gt_colors = normalize_to_rgb(gt_feats_3d)
+            gen_colors = normalize_to_rgb(gen_feats_3d)
+            # Save PLYs
+            save_colored_ply(coords_np, gt_colors, os.path.join(save_dir, f"batch_{i}_gt_pca.ply"))
+            save_colored_ply(coords_np, gen_colors, os.path.join(save_dir, f"batch_{i}_gen_pca.ply"))
+            # Save Tensors for further analysis
+            torch.save(gt_latents, os.path.join(save_dir, f"gt_latent_{i}.pt"))
+            torch.save(batch, os.path.join(save_dir, f"gt_data_batch_{i}.pt"))
+            torch.save(sample_results.samples, os.path.join(save_dir, f"sample_latent_{i}.pt"))
+if __name__ == '__main__':
+    torch.manual_seed(42)
+    config_path = "/gemini/user/private/zhaotianhao/Triposf/config_slat_flow_128to256_pointnet_test.yaml"
+    with open(config_path) as f:
+        cfg = yaml.safe_load(f)
+    # Initialize Model on CPU first
+    diffusion_model = SLatFlowModel(
+        resolution=cfg['flow']['resolution'],
+        in_channels=cfg['flow']['in_channels'],
+        out_channels=cfg['flow']['out_channels'],
+        model_channels=cfg['flow']['model_channels'],
+        cond_channels=cfg['flow']['cond_channels'],
+        num_blocks=cfg['flow']['num_blocks'],
+        num_heads=cfg['flow']['num_heads'],
+        mlp_ratio=cfg['flow']['mlp_ratio'],
+        patch_size=cfg['flow']['patch_size'],
+        num_io_res_blocks=cfg['flow']['num_io_res_blocks'],
+        io_block_channels=cfg['flow']['io_block_channels'],
+        pe_mode=cfg['flow']['pe_mode'],
+        qk_rms_norm=cfg['flow']['qk_rms_norm'],
+        qk_rms_norm_cross=cfg['flow']['qk_rms_norm_cross'],
+        use_fp16=cfg['flow'].get('use_fp16', False),
+    ).to("cuda" if torch.cuda.is_available() else "cpu")
+    trainer = SLatFlowMatchingTrainer(
+        denoiser=diffusion_model,
+        t_schedule=cfg['t_schedule'],
+        sigma_min=cfg['sigma_min'],
+        cfg=cfg,
+    )
+    trainer.eval()

test_slat_vae_128to1024_pointnet.py ADDED Viewed

The diff for this file is too large to render. See raw diff

test_slat_vae_128to1024_pointnet_vae.py ADDED Viewed

The diff for this file is too large to render. See raw diff

test_slat_vae_128to1024_pointnet_vae_addhead.py ADDED Viewed

The diff for this file is too large to render. See raw diff

test_slat_vae_128to1024_pointnet_vae_head.py ADDED Viewed

	@@ -0,0 +1,1339 @@

+import os
+import yaml
+import torch
+import numpy as np
+import random
+from tqdm import tqdm
+from collections import defaultdict
+import plotly.graph_objects as go
+from plotly.subplots import make_subplots
+from torch.utils.data import DataLoader, Subset
+from triposf.modules.sparse.basic import SparseTensor
+from triposf.models.triposf_vae.VoxelFeatureVAE_edge_woself_128to1024_decoder_head import VoxelVAE
+from vertex_encoder import VoxelFeatureEncoder_edge, VoxelFeatureEncoder_vtx, VoxelFeatureEncoder_active, VoxelFeatureEncoder_active_pointnet, ConnectionHead
+from utils import load_pretrained_woself
+from dataset_triposf_head import VoxelVertexDataset_edge, collate_fn_pointnet
+from functools import partial
+import itertools
+from typing import List, Tuple, Set
+from collections import OrderedDict
+from scipy.spatial import cKDTree
+from sklearn.neighbors import KDTree
+import trimesh
+import torch
+import torch.nn.functional as F
+import time
+from sklearn.decomposition import PCA
+import matplotlib.pyplot as plt
+import networkx as nx
+def predict_mesh_connectivity(
+    connection_head,
+    vtx_feats,
+    vtx_coords,
+    batch_size=10000,
+    threshold=0.5,
+    k_neighbors=64, # 限制每个点只检测最近的 K 个邻居，设为 -1 则全连接检测
+    device='cuda'
+):
+    """
+    Args:
+        connection_head: 训练好的 MLP 模型
+        vtx_feats: [N, C] 顶点特征
+        vtx_coords: [N, 3] 顶点坐标 (用于 KNN 筛选候选边)
+        batch_size: MLP 推理的 batch size
+        threshold: 判定连接的概率阈值
+        k_neighbors: K-NN 数量。如果是 None 或 -1，则检测所有 N*(N-1)/2 对。
+    """
+    num_verts = vtx_feats.shape[0]
+    if num_verts < 3:
+        return [], [] # 无法构成三角形
+    connection_head.eval()
+    # --- 1. 生成候选边 (Candidate Edges) ---
+    if k_neighbors is not None and k_neighbors > 0 and k_neighbors < num_verts:
+        # 策略 A: 局部 KNN (推荐)
+        # 计算距离矩阵可能会 OOM，使用分块或 KDTree/Faiss，这里用 PyTorch 的 cdist 分块简化版
+        # 或者直接暴力 cdist 如果 N < 10000
+        # 为了简单且高效，这里演示简单的 cdist (注意显存)
+        # 如果 N 很大 (>5000)，建议使用 faiss 或 scipy.spatial.cKDTree
+        dist_mat = torch.cdist(vtx_coords.float(), vtx_coords.float()) # [N, N]
+        # 取 topk (smallest distance)，排除自己
+        # values: [N, K], indices: [N, K]
+        _, indices = torch.topk(dist_mat, k=k_neighbors + 1, dim=1, largest=False)
+        neighbor_indices = indices[:, 1:] # 去掉第一列（自己）
+        # 构建 source, target 索引
+        src = torch.arange(num_verts, device=device).unsqueeze(1).repeat(1, k_neighbors).flatten()
+        dst = neighbor_indices.flatten()
+        # 此时得到的边是双向的 (u->v 和 v->u 可能都存在)，为了效率可以去重
+        # 但为了利用你的 symmetric MLP，保留双向或者只保留 u < v 均可
+        # 这里为了简单，我们生成 u < v 的 mask
+        mask = src < dst
+        u_indices = src[mask]
+        v_indices = dst[mask]
+    else:
+        # 策略 B: 全连接 (O(N^2)) - 仅当 N 较小时使用
+        u_indices, v_indices = torch.triu_indices(num_verts, num_verts, offset=1, device=device)
+    # --- 2. 批量推理 ---
+    all_probs = []
+    num_candidates = u_indices.shape[0]
+    with torch.no_grad():
+        for i in range(0, num_candidates, batch_size):
+            end = min(i + batch_size, num_candidates)
+            batch_u = u_indices[i:end]
+            batch_v = v_indices[i:end]
+            feat_u = vtx_feats[batch_u]
+            feat_v = vtx_feats[batch_v]
+            # Symmetric Forward (和你训练时保持一致)
+            # A -> B
+            input_uv = torch.cat([feat_u, feat_v], dim=-1)
+            logits_uv = connection_head(input_uv)
+            # B -> A
+            input_vu = torch.cat([feat_v, feat_u], dim=-1)
+            logits_vu = connection_head(input_vu)
+            # Sum logits
+            logits = (logits_uv + logits_vu) / 2.
+            probs = torch.sigmoid(logits)
+            all_probs.append(probs)
+    all_probs = torch.cat(all_probs).squeeze() # [M]
+    # --- 3. 筛选连接边 ---
+    connected_mask = all_probs > threshold
+    final_u = u_indices[connected_mask].cpu().numpy()
+    final_v = v_indices[connected_mask].cpu().numpy()
+    edges = np.stack([final_u, final_v], axis=1) # [E, 2]
+    return edges
+def build_triangles_from_edges(edges, num_verts):
+    """
+    从边列表构建三角形。
+    寻找图中所有的 3-Cliques (三元环)。
+    这在图论中是一个经典问题，可以使用 networkx 库。
+    """
+    if len(edges) == 0:
+        return np.empty((0, 3), dtype=int)
+    G = nx.Graph()
+    G.add_nodes_from(range(num_verts))
+    G.add_edges_from(edges)
+    # 寻找所有的 3-cliques (三角形)
+    # enumerate_all_cliques 返回所有大小的 clique，我们需要过滤大小为 3 的
+    # 或者使用 nx.triangles ? 不，那个只返回数量
+    # 使用 nx.enumerate_all_cliques 效率可能较低，对于稀疏图还可以
+    # 更快的方法：迭代每条边 (u, v)，查找 u 和 v 的公共邻居 w
+    triangles = []
+    adj = [set(G.neighbors(n)) for n in range(num_verts)]
+    # 为了避免重复 (u, v, w), (v, w, u)... 我们可以强制 u < v < w
+    # 既然 edges 已经是 u < v (如果我们之前做了 triu)，则只需要找 w > v 且 w in adj[u]
+    # 优化算法：
+    for u, v in edges:
+        if u > v: u, v = v, u # 确保有序
+        # 找公共邻居
+        common = adj[u].intersection(adj[v])
+        for w in common:
+            if w > v: # 强制顺序 u < v < w 防止重复
+                triangles.append([u, v, w])
+    return np.array(triangles)
+def downsample_voxels(
+    voxels: torch.Tensor,
+    input_resolution: int,
+    output_resolution: int
+) -> torch.Tensor:
+    if input_resolution % output_resolution != 0:
+        raise ValueError(f"input_resolution ({input_resolution}) must be divisible "
+                         f"by output_resolution ({output_resolution}).")
+    factor = input_resolution // output_resolution
+    downsampled_voxels = voxels.clone().to(torch.long)
+    downsampled_voxels[:, 1:] //= factor
+    unique_downsampled_voxels = torch.unique(downsampled_voxels, dim=0)
+    return unique_downsampled_voxels
+def visualize_colored_points_ply(coords, vectors, filename):
+    """
+    可视化点云，并用向量方向的颜色来表示，保存为 PLY 文件。
+    Args:
+        coords (torch.Tensor or np.ndarray): 3D坐标，形状为 (N, 3)。
+        vectors (torch.Tensor or np.ndarray): 方向向量，形状为 (N, 3)。
+        filename (str): 保存输出文件的名称，必须是 .ply 格式。
+    """
+    # 确保输入是 numpy 数组
+    if isinstance(coords, torch.Tensor):
+        coords = coords.detach().cpu().numpy()
+    if isinstance(vectors, torch.Tensor):
+        vectors = vectors.detach().cpu().to(torch.float32).numpy()
+    # 检查输入数据是否为空，防止崩溃
+    if coords.size == 0 or vectors.size == 0:
+        print(f"警告：输入数据为空，未生成 {filename} 文件。")
+        return
+    # 将向量分量从 [-1, 1] 映射到 [0, 255]
+    # np.clip 用于将数值限制在 -1 和 1 之间，防止颜色溢出
+    # (vectors + 1) 将范围从 [-1, 1] 移动到 [0, 2]
+    # * 127.5 将范围从 [0, 2] 缩放到 [0, 255]
+    colors = np.clip((vectors + 1) * 127.5, 0, 255).astype(np.uint8)
+    # 创建一个点云对象，并传入颜色信息
+    # trimesh.PointCloud 能够自动处理带颜色的点
+    points = trimesh.points.PointCloud(coords, colors=colors)
+    # 导出为 PLY 文件
+    points.export(filename, file_type='ply')
+    print(f"可视化文件已成功保存为: {filename}")
+def compute_vertex_matching(pred_coords, gt_coords, threshold=1.0):
+    """
+    使用 KDTree 最近邻 + 贪心匹配算法计算顶点匹配 (欧式距离)
+    参数:
+        pred_coords: 预测坐标 (Tensor)
+        gt_coords: 真实坐标 (Tensor)
+        threshold: 匹配误差阈值 (默认1.0)
+    返回:
+        matches: 匹配成功的顶点数量
+        match_rate: 匹配率 (基于真实顶点数)
+        pred_total: 预测顶点总数
+        gt_total: 真实顶点总数
+    """
+    # 转换为整数坐标并去重
+    print('len(pred_coords)', len(pred_coords))
+    pred_array = np.unique(pred_coords.detach().to(torch.float32).cpu().numpy(), axis=0)
+    gt_array = np.unique(gt_coords.detach().cpu().to(torch.float32).numpy(), axis=0)
+    print('len(pred_array)', len(pred_array))
+    pred_total = len(pred_array)
+    gt_total = len(gt_array)
+    # 如果没有点，直接返回
+    if pred_total == 0 or gt_total == 0:
+        return 0, 0.0, pred_total, gt_total
+    # 建立 KDTree（以 gt 为基准）
+    tree = KDTree(gt_array)
+    # 查找预测点到最近的 gt 点
+    dist, indices = tree.query(pred_array, k=1)
+    dist = dist.squeeze()
+    indices = indices.squeeze()
+    # 贪心去重：确保 1 对 1
+    matches = 0
+    used_gt = set()
+    for d, idx in zip(dist, indices):
+        if d <= threshold and idx not in used_gt:
+            matches += 1
+            used_gt.add(idx)
+    match_rate = matches / max(gt_total, 1)
+    return matches, match_rate, pred_total, gt_total
+def flatten_coords_3d(coords_3d: torch.Tensor):
+    coords_3d_long = coords_3d #.long()
+    base_x = 1024
+    base_y = 1024 * 1024
+    base_z = 1024 * 1024 * 1024
+    flat_coords = coords_3d_long[:, 0] * base_z + \
+                  coords_3d_long[:, 1] * base_y + \
+                  coords_3d_long[:, 2] * base_x
+    return flat_coords
+class Tester:
+    def __init__(self, ckpt_path, config_path=None, dataset_path=None):
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.ckpt_path = ckpt_path
+        self.config = self._load_config(config_path)
+        self.dataset_path = dataset_path # or self.config['dataset']['path']
+        checkpoint = torch.load(self.ckpt_path, map_location='cpu')
+        self.epoch = checkpoint.get('epoch', 0)
+        self._init_models()
+        self._init_dataset()
+        self.result_dir = os.path.join(os.path.dirname(ckpt_path), "evaluation_results")
+        os.makedirs(self.result_dir, exist_ok=True)
+        dataset_name_clean = os.path.basename(self.dataset_path).replace('.npz', '').replace('.npy', '')
+        self.output_voxel_dir = os.path.join(os.path.dirname(ckpt_path),
+                                             f"epoch_{self.epoch}_{dataset_name_clean}_voxels_0_gs")
+        os.makedirs(self.output_voxel_dir, exist_ok=True)
+        self.output_obj_dir = os.path.join(os.path.dirname(ckpt_path),
+                                           f"epoch_{self.epoch}_{dataset_name_clean}_obj_0_gs")
+        os.makedirs(self.output_obj_dir, exist_ok=True)
+    def _save_voxel_ply(self, coords: torch.Tensor, labels: torch.Tensor, filename: str):
+        if coords.numel() == 0:
+            return
+        coords_np = coords.cpu().to(torch.float32).numpy()
+        labels_np = labels.cpu().to(torch.float32).numpy()
+        colors = np.zeros((coords_np.shape[0], 3), dtype=np.uint8)
+        colors[labels_np == 0] = [255, 0, 0]
+        colors[labels_np == 1] = [0, 0, 255]
+        try:
+            import trimesh
+            point_cloud = trimesh.PointCloud(vertices=coords_np, colors=colors)
+            ply_path = os.path.join(self.output_voxel_dir, f"{filename}.ply")
+            point_cloud.export(ply_path)
+        except ImportError:
+            ply_path = os.path.join(self.output_voxel_dir, f"{filename}.ply")
+            with open(ply_path, 'w') as f:
+                f.write("ply\n")
+                f.write("format ascii 1.0\n")
+                f.write(f"element vertex {coords_np.shape[0]}\n")
+                f.write("property float x\n")
+                f.write("property float y\n")
+                f.write("property float z\n")
+                f.write("property uchar red\n")
+                f.write("property uchar green\n")
+                f.write("property uchar blue\n")
+                f.write("end_header\n")
+                for i in range(coords_np.shape[0]):
+                    f.write(f"{coords_np[i,0]} {coords_np[i,1]} {coords_np[i,2]} {colors[i,0]} {colors[i,1]} {colors[i,2]}\n")
+    def _load_config(self, config_path=None):
+        if config_path and os.path.exists(config_path):
+            with open(config_path) as f:
+                return yaml.safe_load(f)
+        ckpt_dir = os.path.dirname(self.ckpt_path)
+        possible_configs = [
+            os.path.join(ckpt_dir, "config.yaml"),
+            os.path.join(os.path.dirname(ckpt_dir), "config.yaml")
+        ]
+        for config_file in possible_configs:
+            if os.path.exists(config_file):
+                with open(config_file) as f:
+                    print(f"Loaded config from: {config_file}")
+                    return yaml.safe_load(f)
+        checkpoint = torch.load(self.ckpt_path, map_location='cpu')
+        if 'config' in checkpoint:
+            print("Loaded config from checkpoint")
+            return checkpoint['config']
+        raise FileNotFoundError("Could not find config_edge.yaml in checkpoint directory or parent, and config not saved in checkpoint.")
+    def _init_models(self):
+        self.voxel_encoder = VoxelFeatureEncoder_active_pointnet(
+            in_channels=15,
+            hidden_dim=256,
+            out_channels=1024,
+            scatter_type='mean',
+            n_blocks=5,
+            resolution=128,
+        ).to(self.device)
+        self.connection_head = ConnectionHead(
+            channels=32 * 2,
+            out_channels=1,
+            mlp_ratio=16,
+        ).to(self.device)
+        self.vae = VoxelVAE( # abalation: VoxelVAE_1volume_dilation
+            in_channels=self.config['model']['in_channels'],
+            latent_dim=self.config['model']['latent_dim'],
+            encoder_blocks=self.config['model']['encoder_blocks'],
+            # decoder_blocks=self.config['model']['decoder_blocks'],
+            decoder_blocks_vtx=self.config['model']['decoder_blocks_vtx'],
+            decoder_blocks_edge=self.config['model']['decoder_blocks_edge'],
+            num_heads=8,
+            num_head_channels=64,
+            mlp_ratio=4.0,
+            attn_mode="swin",
+            window_size=8,
+            pe_mode="ape",
+            use_fp16=False,
+            use_checkpoint=False,
+            qk_rms_norm=False,
+            using_subdivide=True,
+            using_attn=self.config['model']['using_attn'],
+            attn_first=self.config['model'].get('attn_first', True),
+            pred_direction=self.config['model'].get('pred_direction', False),
+        ).to(self.device)
+        load_pretrained_woself(
+            checkpoint_path=self.ckpt_path,
+            voxel_encoder=self.voxel_encoder,
+            connection_head=self.connection_head,
+            vae=self.vae,
+        )
+        # --- 【新增】在这里添加权重检查逻辑 ---
+        print(f"--- 正在检查权重文件中的 NaN/Inf 值... ---")
+        has_nan_inf = False
+        if self._check_weights_for_nan_inf(self.vae, "VoxelVAE"):
+            has_nan_inf = True
+        if self._check_weights_for_nan_inf(self.voxel_encoder, "Vertex Encoder"):
+            has_nan_inf = True
+        if self._check_weights_for_nan_inf(self.connection_head, "Connection Head"):
+            has_nan_inf = True
+        if not has_nan_inf:
+            print("--- 权重检查通过。未发现 NaN/Inf 值。 ---")
+        else:
+            # 如果发现坏值，直接抛出异常，因为评估无法继续
+            raise ValueError(f"在检查点 '{self.ckpt_path}' 中发现了 NaN 或 Inf 值。请检查导致训练不稳定的权重文件。")
+        # --- 检查逻辑结束 ---
+        self.vae.eval()
+        self.voxel_encoder.eval()
+        self.connection_head.eval()
+    def _init_dataset(self):
+        self.dataset = VoxelVertexDataset_edge(
+            root_dir=self.dataset_path,
+            base_resolution=self.config['dataset']['base_resolution'],
+            min_resolution=self.config['dataset']['min_resolution'],
+            cache_dir='/gemini/user/private/zhaotianhao/dataset_cache/test_15c_dora_128to1024',
+            # cache_dir=self.config['dataset']['cache_dir'],
+            renders_dir=self.config['dataset']['renders_dir'],
+            # filter_active_voxels=self.config['dataset']['filter_active_voxels'],
+            filter_active_voxels=False,
+            cache_filter_path=self.config['dataset']['cache_filter_path'],
+            sample_type=self.config['dataset']['sample_type'],
+            active_voxel_res=128,
+            pc_sample_number=819200,
+        )
+        self.dataloader = DataLoader(
+            self.dataset,
+            batch_size=1,
+            shuffle=False,
+            collate_fn=partial(collate_fn_pointnet),
+            num_workers=0,
+            pin_memory=True,
+            # prefetch_factor=4,
+        )
+    def _check_weights_for_nan_inf(self, model: torch.nn.Module, model_name: str) -> bool:
+        """
+        检查模型的所有参数中是否存在 NaN 或 Inf 值。
+        Args:
+            model (torch.nn.Module): 要检查的模型。
+            model_name (str): 模型的名称，用于打印日志。
+        Returns:
+            bool: 如果找到 NaN 或 Inf，则返回 True，否则返回 False。
+        """
+        found_issue = False
+        for name, param in model.named_parameters():
+            if torch.isnan(param.data).any():
+                print(f"[!!!] 严重错误: 在模型 '{model_name}' 的参数 '{name}' 中发现 NaN 值！")
+                found_issue = True
+            if torch.isinf(param.data).any():
+                print(f"[!!!] 严重错误: 在模型 '{model_name}' 的参数 '{name}' 中发现 Inf 值！")
+                found_issue = True
+        return found_issue
+    def _compute_vertex_metrics(self, pred_coords, gt_coords, threshold=1.0):
+        """
+        修改后的函数，确保一对一匹配，并优先匹配最近的点对。
+        """
+        pred_array = np.unique(pred_coords.round().int().cpu().numpy(), axis=0)
+        gt_array = np.unique(gt_coords.round().int().cpu().numpy(), axis=0)
+        pred_total = len(pred_array)
+        gt_total = len(gt_array)
+        if pred_total == 0 or gt_total == 0:
+            return {
+                'recall': 0.0,
+                'precision': 0.0,
+                'f1': 0.0,
+                'matches': 0,
+                'pred_count': pred_total,
+                'gt_count': gt_total
+            }
+        # 依然在预测点上构建KD-Tree，为每个真实点查找最近的预测点
+        tree = cKDTree(pred_array)
+        dists, pred_idxs = tree.query(gt_array, k=1)
+        # --- 核心修改部分 ---
+        # 1. 创建一个列表，包含 (距离, 真实点索引, 预测点索引)
+        #    这样我们就可以按距离对所有可能的匹配进行排序
+        possible_matches = []
+        for gt_idx, (dist, pred_idx) in enumerate(zip(dists, pred_idxs)):
+            if dist <= threshold:
+                possible_matches.append((dist, gt_idx, pred_idx))
+        # 2. 按距离从小到大排序（贪心策略）
+        possible_matches.sort(key=lambda x: x[0])
+        matches = 0
+        # 使用集合来跟踪已经使用过的预测点和真实点，确保一对一匹配
+        used_pred_indices = set()
+        used_gt_indices = set() # 虽然当前逻辑下gt不会重复，但加上更严谨
+        # 3. 遍历排序后的可能匹配，进行一对一分配
+        for dist, gt_idx, pred_idx in possible_matches:
+            # 如果这个预测点和这个真实点都还没有被使用过
+            if pred_idx not in used_pred_indices and gt_idx not in used_gt_indices:
+                matches += 1
+                used_pred_indices.add(pred_idx)
+                used_gt_indices.add(gt_idx)
+        # --- 修改结束 ---
+        # matches 现在是真正的 True Positives 数量，它绝不会超过 pred_total 或 gt_total
+        recall = matches / gt_total if gt_total > 0 else 0.0
+        precision = matches / pred_total if pred_total > 0 else 0.0
+        # 计算F1时，使用标准的 Precision 和 Recall 定义
+        if (precision + recall) == 0:
+            f1 = 0.0
+        else:
+            f1 = 2 * (precision * recall) / (precision + recall)
+        return {
+            'recall': recall,
+            'precision': precision,
+            'f1': f1,
+            'matches': matches,
+            'pred_count': pred_total,
+            'gt_count': gt_total
+        }
+    def _compute_vertex_metrics(self, pred_coords, gt_coords, threshold=1.0):
+        """
+        一个折衷的顶点指标计算方案。
+        它沿用“为每个真实点寻找最近预测点”的逻辑，
+        但通过修正计算方式，确保Precision和F1值不会超过1.0。
+        """
+        # 假设 pred_coords 和 gt_coords 是 PyTorch 张量
+        pred_array = np.unique(pred_coords.round().int().cpu().numpy(), axis=0)
+        gt_array = np.unique(gt_coords.round().int().cpu().numpy(), axis=0)
+        pred_total = len(pred_array)
+        gt_total = len(gt_array)
+        if pred_total == 0 or gt_total == 0:
+            return {
+                'recall': 0.0,
+                'precision': 0.0,
+                'f1': 0.0,
+                'matches': 0,
+                'pred_count': pred_total,
+                'gt_count': gt_total
+            }
+        # 在预测点上构建KD-Tree，为每个真实点查找最近的预测点
+        tree = cKDTree(pred_array)
+        dists, _ = tree.query(gt_array, k=1) # 我们在这里其实不需要 pred 的索引
+        # 1. 计算从 gt 角度出发的匹配数 (True Positives for Recall)
+        #    这和您的第一个函数完全一样。
+        #    这个值代表了“有多少个真实点被成功找到了”。
+        matches_from_gt = np.sum(dists <= threshold)
+        # 2. 计算 Recall (召回率)
+        #    召回率的分母是真实点的总数，所以这里的计算是合理的。
+        recall = matches_from_gt / gt_total if gt_total > 0 else 0.0
+        # 3. 计算 Precision (精确率) - ✅ 这是核心修正点
+        #    精确率的分母是预测点的总数。
+        #    分子（True Positives）不能超过预测点的总数。
+        #    因此，我们取 matches_from_gt 和 pred_total 中的较小值。
+        #    这解决了 Precision > 1 的问题。
+        tp_for_precision = min(matches_from_gt, pred_total)
+        precision = tp_for_precision / pred_total if pred_total > 0 else 0.0
+        # 4. 使用标准的F1分数公式
+        #    您原来的 F1 公式 `2 * matches / (pred + gt)` 是 L1-Score，
+        #    更常用的是基于 Precision 和 Recall 的调和平均数。
+        if (precision + recall) == 0:
+            f1 = 0.0
+        else:
+            f1 = 2 * (precision * recall) / (precision + recall)
+        return {
+            'recall': recall,
+            'precision': precision,
+            'f1': f1,
+            'matches': matches_from_gt, # 仍然报告原始的匹配数，便于观察
+            'pred_count': pred_total,
+            'gt_count': gt_total
+        }
+    def _compute_chamfer_distance(self, p1: torch.Tensor, p2: torch.Tensor, one_sided: bool = False):
+        if len(p1) == 0 or len(p2) == 0:
+            return float('nan')
+        dist_p1_p2 = torch.min(torch.cdist(p1, p2), dim=1)[0].mean()
+        if one_sided:
+            return dist_p1_p2.item()
+        else:
+            dist_p2_p1 = torch.min(torch.cdist(p2, p1), dim=1)[0].mean()
+            return (dist_p1_p2 + dist_p2_p1).item() / 2
+    def visualize_latent_space_pca(self, sample_idx: int):
+        """
+        Encodes a sample, performs PCA on its latent features, and saves a
+        colored PLY file for visualization.
+        The position of each point in the PLY file corresponds to the spatial
+        location in the latent grid.
+        The color of each point represents the first three principal components
+        of its feature vector.
+        """
+        print(f"--- Starting Latent Space PCA Visualization for Sample {sample_idx} ---")
+        self.vae.eval()
+        try:
+            # 1. Get the latent representation for the sample
+            latent = self._get_latent_for_sample(sample_idx)
+        except ValueError as e:
+            print(f"Error: {e}")
+            return
+        latent_coords = latent.coords.detach().cpu().numpy()
+        latent_feats = latent.feats.detach().cpu().numpy()
+        if latent_feats.shape[0] < 3:
+            print(f"Warning: Not enough latent points ({latent_feats.shape[0]}) to perform PCA. Skipping.")
+            return
+        print(f"--> Performing PCA on {latent_feats.shape[0]} latent vectors of dimension {latent_feats.shape[1]}...")
+        # 2. Perform PCA to reduce feature dimensions to 3
+        pca = PCA(n_components=3)
+        pca_features = pca.fit_transform(latent_feats)
+        print(f"    Explained variance ratio by 3 components: {pca.explained_variance_ratio_}")
+        print(f"    Total explained variance: {np.sum(pca.explained_variance_ratio_):.4f}")
+        # 3. Normalize the PCA components to be used as RGB colors [0, 255]
+        # We normalize each component independently to maximize color contrast
+        normalized_colors = np.zeros_like(pca_features)
+        for i in range(3):
+            min_val = pca_features[:, i].min()
+            max_val = pca_features[:, i].max()
+            if max_val - min_val > 1e-6:
+                normalized_colors[:, i] = (pca_features[:, i] - min_val) / (max_val - min_val)
+            else:
+                normalized_colors[:, i] = 0.5 # Handle case of constant value
+        colors_uint8 = (normalized_colors * 255).astype(np.uint8)
+        # 4. Prepare spatial coordinates for the point cloud
+        # latent_coords is (batch_idx, x, y, z), we want the xyz part
+        spatial_coords = latent_coords[:, 1:]
+        # 5. Create and save the colored PLY file
+        try:
+            # Create a Trimesh PointCloud object
+            point_cloud = trimesh.points.PointCloud(vertices=spatial_coords, colors=colors_uint8)
+            # Define the output filename
+            filename = f"sample_{sample_idx}_latent_pca.ply"
+            ply_path = os.path.join(self.output_voxel_dir, filename)
+            # Export the file
+            point_cloud.export(ply_path)
+            print(f"--> Successfully saved PCA visualization to: {ply_path}")
+        except Exception as e:
+            print(f"Error during Trimesh export: {e}")
+            print("Please ensure 'trimesh' is installed correctly.")
+    def _get_latent_for_sample(self, sample_idx: int) -> SparseTensor:
+        """
+        Encodes a single sample and returns its latent representation.
+        """
+        print(f"--> Encoding sample {sample_idx} to get its latent vector...")
+        # Get data for the specified sample
+        batch_data = self.dataset[sample_idx]
+        if batch_data is None:
+            raise ValueError(f"Sample at index {sample_idx} could not be loaded.")
+        # Use the collate function to form a batch
+        batch_data = collate_fn_pointnet([batch_data])
+        with torch.no_grad():
+            # 1. Get input data and move to device
+            gt_vertex_voxels_1024 = batch_data['gt_vertex_voxels_1024'].to(self.device)
+            combined_voxels_1024 = batch_data['combined_voxels_1024'].to(self.device)
+            active_coords = batch_data['active_voxels_128'].to(self.device)
+            point_cloud = batch_data['point_cloud_128'].to(self.device)
+            vtx_128 = downsample_voxels(gt_vertex_voxels_1024, input_resolution=1024, output_resolution=128)
+            edge_128 = downsample_voxels(combined_voxels_1024, input_resolution=1024, output_resolution=128)
+            active_voxel_feats = self.voxel_encoder(
+                p=point_cloud,
+                sparse_coords=active_coords,
+                res=128,
+                bbox_size=(-0.5, 0.5),
+                # voxel_label=active_labels,
+            )
+            sparse_input = SparseTensor(
+                feats=active_voxel_feats,
+                coords=active_coords.int()
+            )
+            # 2. Encode to get the latent representation
+            latent_128, posterior = self.vae.encode(sparse_input, sample_posterior=True,)
+            print(f"    Latent for sample {sample_idx} obtained. Shape: {latent_128.feats.shape}")
+            return latent_128
+    def evaluate(self, num_samples=None, visualize=False, chamfer_threshold=0.9, threshold=1.):
+        total_samples = len(self.dataset)
+        eval_samples = min(num_samples or total_samples, total_samples)
+        # sample_indices = random.sample(range(total_samples), eval_samples) if num_samples else range(total_samples)
+        sample_indices = range(eval_samples)
+        eval_dataset = Subset(self.dataset, sample_indices)
+        eval_loader = DataLoader(
+            eval_dataset,
+            batch_size=1,
+            shuffle=False,
+            collate_fn=partial(collate_fn_pointnet),
+            num_workers=self.config['training']['num_workers'],
+            pin_memory=True,
+        )
+        per_sample_metrics = {
+            'vertex': {res: [] for res in [128, 256, 512, 1024]},
+            'edge': {res: [] for res in [128, 256, 512, 1024]},
+            'sample_names': []
+        }
+        avg_metrics = {
+            'vertex': {res: defaultdict(list) for res in [128, 256, 512, 1024]},
+            'edge': {res: defaultdict(list) for res in [128, 256, 512, 1024]},
+        }
+        self.vae.eval()
+        for batch_idx, batch_data in enumerate(tqdm(eval_loader, desc="Evaluating")):
+            if batch_data is None:
+                continue
+            sample_idx = sample_indices[batch_idx]
+            sample_name = f'sample_{sample_idx}'
+            per_sample_metrics['sample_names'].append(sample_name)
+            # batch_save_path = f"/root/Trisf/output_slat_flow_matching_active/ckpts/8w/195000_sample_active_vis_42seed_trellis_generate/gt_data_batch_{batch_idx}.pt"
+            # if not os.path.exists(batch_save_path):
+            #     print(f"Warning: Saved batch file not found: {batch_save_path}")
+            #     continue
+            # batch_data = torch.load(batch_save_path, map_location=self.device)
+            with torch.no_grad():
+                # 1. Get input data
+                combined_voxels_1024 = batch_data['combined_voxels_1024'].to(self.device)
+                combined_voxel_labels_1024 = batch_data['combined_voxel_labels_1024'].to(self.device)
+                gt_combined_endpoints_1024 = batch_data['gt_combined_endpoints_1024'].to(self.device)
+                gt_combined_errors_1024 = batch_data['gt_combined_errors_1024'].to(self.device)
+                edge_mask = (combined_voxel_labels_1024 == 1)
+                gt_edge_endpoints_1024 = gt_combined_endpoints_1024[edge_mask].to(self.device)
+                gt_edge_errors_1024 = gt_combined_errors_1024[edge_mask].to(self.device)
+                gt_edge_voxels_1024 = combined_voxels_1024[edge_mask].to(self.device)
+                p1 = gt_edge_endpoints_1024[:, 1:4].float()
+                p2 = gt_edge_endpoints_1024[:, 4:7].float()
+                mask = ( (p1[:,0] < p2[:,0]) |
+                        ((p1[:,0] == p2[:,0]) & (p1[:,1] < p2[:,1])) |
+                        ((p1[:,0] == p2[:,0]) & (p1[:,1] == p2[:,1]) & (p1[:,2] <= p2[:,2])) )
+                pA = torch.where(mask[:, None], p1, p2)  # smaller one
+                pB = torch.where(mask[:, None], p2, p1)  # larger one
+                d = pB - pA
+                dir_gt = F.normalize(d, dim=-1, eps=1e-6)
+                gt_vertex_voxels_1024 = batch_data['gt_vertex_voxels_1024'].to(self.device).int()
+                vtx_128 = downsample_voxels(gt_vertex_voxels_1024, input_resolution=1024, output_resolution=128)
+                edge_128 = downsample_voxels(combined_voxels_1024, input_resolution=1024, output_resolution=128)
+                edge_512 = downsample_voxels(combined_voxels_1024, input_resolution=1024, output_resolution=512)
+                edge_256 = downsample_voxels(combined_voxels_1024, input_resolution=1024, output_resolution=256)
+                edge_1024 = combined_voxels_1024
+                print('vtx_128.shape', vtx_128.shape)
+                print('edge_128.shape', edge_128.shape)
+                gt_edge_voxels_list = [
+                    edge_128,
+                    edge_256,
+                    edge_512,
+                    edge_1024,
+                ]
+                active_coords = batch_data['active_voxels_128'].to(self.device)
+                point_cloud = batch_data['point_cloud_128'].to(self.device)
+                active_voxel_feats = self.voxel_encoder(
+                    p=point_cloud,
+                    sparse_coords=active_coords,
+                    res=128,
+                    bbox_size=(-0.5, 0.5),
+                    # voxel_label=active_labels,
+                )
+                sparse_input = SparseTensor(
+                    feats=active_voxel_feats,
+                    coords=active_coords.int()
+                )
+                latent_128, posterior = self.vae.encode(sparse_input)
+                # load_path = f'/root/Trisf/output_slat_flow_matching_active/ckpts/8w/195000_sample_active_vis_42seed_trellis_generate/sample_latent_{batch_idx}.pt'
+                # latent_128 = torch.load(load_path, map_location=self.device)
+                print('latent_128.feats.mean()', latent_128.feats.mean(), 'latent_128.feats.std()', latent_128.feats.std())
+                print('posterior.mean', posterior.mean.mean(), 'posterior.std', posterior.std.mean(), 'posterior.var', posterior.var.mean())
+                print('latent_128.coords.shape', latent_128.coords.shape)
+                # latent_128 = torch.load(f"/root/Trisf/output_slat_flow_matching/ckpts/1100_chair_sample/110000step_sample/sample_results_samples_{batch_idx}.pt", map_location=self.device)
+                latent_128 = SparseTensor(
+                    coords=latent_128.coords,
+                    feats=latent_128.feats + 0. * torch.randn_like(latent_128.feats),
+                )
+                # self.output_voxel_dir = os.path.dirname(load_path)
+                # self.output_obj_dir = os.path.dirname(load_path)
+                # 7. Decoding with separate vertex and edge processing
+                decoded_results = self.vae.decode(
+                    latent_128,
+                    gt_vertex_voxels_list=[],
+                    gt_edge_voxels_list=[],
+                    training=False,
+                    inference_threshold=0.5,
+                    vis_last_layer=False,
+                )
+                error = 0 #decoded_results[-1]['edge']['predicted_offset_feats']
+                if self.config['model'].get('pred_direction', False):
+                    pred_dir = decoded_results[-1]['edge']['predicted_direction_feats']
+                    zero_mask = (pred_dir == 0).all(dim=1)  # [N]，True 表示这一行全为0
+                    num_zeros = zero_mask.sum().item()
+                    print("Number of zero vectors:", num_zeros)
+                    pred_edge_coords_3d = decoded_results[-1]['edge']['coords']
+                    print('pred_edge_coords_3d.shape', pred_edge_coords_3d.shape)
+                    print('pred_dir.shape', pred_dir.shape)
+                    if pred_edge_coords_3d.shape[-1] == 4:
+                        pred_edge_coords_3d = pred_edge_coords_3d[:, 1:]
+                    # visualize_directions(pred_edge_coords_3d, pred_dir, sample_ratio=0.02)
+                    save_pth = os.path.join(self.output_voxel_dir, f"{sample_name}_direction.ply")
+                    # visualize_colored_points_ply(pred_edge_coords_3d - error / 2. + 0.5, pred_dir, save_pth)
+                    visualize_colored_points_ply(pred_edge_coords_3d, pred_dir, save_pth)
+                    save_pth = os.path.join(self.output_voxel_dir, f"{sample_name}_direction_gt.ply")
+                    # visualize_colored_points_ply((gt_edge_voxels_1024[:, 1:] - gt_edge_errors_1024[:, 1:] + 0.5), dir_gt, save_pth)
+                    visualize_colored_points_ply((gt_edge_voxels_1024[:, 1:]), dir_gt, save_pth)
+                pred_vtx_coords_3d = decoded_results[-1]['vertex']['coords']
+                pred_edge_coords_3d = decoded_results[-1]['edge']['coords']
+                pred_edge_coords_np = np.round(pred_edge_coords_3d.cpu().numpy()).astype(int)
+                gt_vertex_voxels_1024 = batch_data['gt_vertex_voxels_1024'][:, 1:].to(self.device)
+                gt_edge_voxels_1024 = batch_data['gt_edge_voxels_1024'][:, 1:].to(self.device)
+                gt_edge_coords_np = np.round(gt_edge_voxels_1024.cpu().numpy()).astype(int)
+                # Calculate metrics and save results
+                matches, match_rate, pred_total, gt_total = compute_vertex_matching(pred_vtx_coords_3d, gt_vertex_voxels_1024, threshold=threshold,)
+                print(f"\n----- Resolution {1024} vtx -----")
+                print(f"Pred Vertices: {pred_total} | GT Vertices: {gt_total}")
+                print(f"Matched Vertices: {matches} | Match Rate: {match_rate:.2%}")
+                self._save_voxel_ply(pred_vtx_coords_3d / 1024., torch.zeros(len(pred_vtx_coords_3d)), f"{sample_name}_pred_vtx")
+                self._save_voxel_ply((pred_edge_coords_3d) / 1024, torch.zeros(len(pred_edge_coords_3d)), f"{sample_name}_pred_edge")
+                self._save_voxel_ply(gt_vertex_voxels_1024 / 1024, torch.zeros(len(gt_vertex_voxels_1024)), f"{sample_name}_gt_vertex")
+                self._save_voxel_ply((combined_voxels_1024[:, 1:]) / 1024., torch.zeros(len(gt_combined_errors_1024)), f"{sample_name}_gt_edge")
+                # Calculate vertex-specific metrics
+                matches, match_rate, pred_total, gt_total = compute_vertex_matching(pred_edge_coords_3d, combined_voxels_1024[:, 1:], threshold=threshold,)
+                print(f"\n----- Resolution {1024} edge -----")
+                print('pred_edge_coords_3d.shape', pred_edge_coords_3d.shape)
+                print('gt_edge_voxels_1024.shape', gt_edge_voxels_1024.shape)
+                print(f"Pred Vertices: {pred_total} | GT Vertices: {gt_total}")
+                print(f"Matched Vertices: {matches} | Match Rate: {match_rate:.2%}")
+                pred_vertex_coords_np = np.round(pred_vtx_coords_3d.cpu().numpy()).astype(int)
+                pred_edges = []
+                gt_vertex_coords_np = np.round(gt_vertex_voxels_1024.cpu().numpy()).astype(int)
+                if visualize:
+                    if pred_vtx_coords_3d.shape[-1] == 4:
+                        pred_vtx_coords_float = pred_vtx_coords_3d[:, 1:].float()
+                    else:
+                        pred_vtx_coords_float = pred_vtx_coords_3d.float()
+                    pred_vtx_feats = decoded_results[-1]['vertex']['feats']
+                # ==========================================
+                # Link Prediction & Mesh Generation
+                # ==========================================
+                print("Predicting connectivity...")
+                # 1. 预测边
+                # 注意：K_neighbors 的设置。如果是物体，64 足够了。
+                # 如果点非常稀疏，可能需要更大。
+                pred_edges = predict_mesh_connectivity(
+                    connection_head=self.connection_head, # 或者是 self.connection_head，取决于你在哪里定义的
+                    vtx_feats=pred_vtx_feats,
+                    vtx_coords=pred_vtx_coords_float,
+                    batch_size=4096,
+                    threshold=0.5,
+                    k_neighbors=None,
+                    device=self.device
+                )
+                print(f"Predicted {len(pred_edges)} edges.")
+                # 2. 构建三角形
+                num_verts = pred_vtx_coords_float.shape[0]
+                pred_faces = build_triangles_from_edges(pred_edges, num_verts)
+                print(f"Constructed {len(pred_faces)} triangles.")
+                # 3. 保存 OBJ
+                import trimesh
+                # 坐标归一化/还原 (根据你的需求，这里假设你是 0-1024 的体素坐标)
+                # 如果想保存为归一化坐标：
+                mesh_verts = pred_vtx_coords_float.cpu().numpy() / 1024.0
+                # 如果有 error offset，记得加上！
+                # 你之前的代码好像没有对 vertex 加 offset，只对 edge 加了
+                # 如果 vertex 也有 offset (如 dual contouring)，在这里加上
+                # 移动到中心 (可选)
+                mesh_verts = mesh_verts - 0.5
+                mesh = trimesh.Trimesh(vertices=mesh_verts, faces=pred_faces)
+                # 过滤孤立点 (可选)
+                # mesh.remove_unreferenced_vertices()
+                output_obj_path = os.path.join(self.output_voxel_dir, f"{sample_name}_recon.obj")
+                mesh.export(output_obj_path)
+                print(f"Saved mesh to {output_obj_path}")
+                # 保存边线 (用于 Debug)
+                # 有时候三角形很难形成，只看边也很有用
+                edges_path = os.path.join(self.output_voxel_dir, f"{sample_name}_edges.ply")
+                # self._visualize_vertices(pred_edge_coords_np, gt_edge_coords_np, f"{sample_name}_edge_comparison")
+                # Process results at different resolutions
+                for i, res in enumerate([128, 256, 512, 1024]):
+                    if i >= len(decoded_results):
+                        continue
+                    gt_key = f'gt_vertex_voxels_{res}'
+                    if gt_key not in batch_data:
+                        continue
+                    if i == 0:
+                        pred_coords_res = decoded_results[i]['vtx_sp'].coords[:, 1:].float()
+                        gt_coords_res = batch_data[gt_key][:, 1:].float().to(self.device)
+                    else:
+                        pred_coords_res = decoded_results[i]['vertex']['coords'].float()
+                        gt_coords_res = batch_data[gt_key][:, 1:].float().to(self.device)
+                    v_metrics = self._compute_vertex_metrics(pred_coords_res, gt_coords_res, threshold=threshold)
+                    per_sample_metrics['vertex'][res].append({
+                        'recall': v_metrics['recall'],
+                        'precision': v_metrics['precision'],
+                        'f1': v_metrics['f1'],
+                        'num_pred': len(pred_coords_res),
+                        'num_gt': len(gt_coords_res)
+                    })
+                    avg_metrics['vertex'][res]['recall'].append(v_metrics['recall'])
+                    avg_metrics['vertex'][res]['precision'].append(v_metrics['precision'])
+                    avg_metrics['vertex'][res]['f1'].append(v_metrics['f1'])
+                    gt_edge_key = f'gt_edge_voxels_{res}'
+                    if gt_edge_key not in batch_data:
+                        continue
+                    if i == 0:
+                        pred_edge_coords_res = decoded_results[i]['edge_sp'].coords[:, 1:].float()
+                        # gt_edge_coords_res = batch_data[gt_edge_key][:, 1:].float().to(self.device)
+                        idx = i
+                        gt_edge_coords_res = gt_edge_voxels_list[idx][:, 1:].float().to(self.device)
+                    elif i == 3:
+                        idx = i
+                        #################################
+                        # pred_edge_coords_res = decoded_results[i]['edge']['coords'].float() - error / 2. + 0.5
+                        # # gt_edge_coords_res = batch_data[gt_edge_key][:, 1:].float().to(self.device)
+                        # gt_edge_coords_res = gt_edge_voxels_list[idx][:, 1:].float().to(self.device) - gt_combined_errors_1024[:, 1:].to(self.device) + 0.5
+                        pred_edge_coords_res = decoded_results[i]['edge']['coords'].float()
+                        gt_edge_coords_res = gt_edge_voxels_list[idx][:, 1:].float().to(self.device)
+                        # self._save_voxel_ply(gt_edge_voxels_list[idx][:, 1:].float().to(self.device) / (128*2**i), torch.zeros(len(gt_edge_coords_res)), f"{sample_name}_gt_edge_{128*2**i}res_wooffset")
+                        # self._save_voxel_ply(decoded_results[i]['edge']['coords'].float() / (128*2**i), torch.zeros(len(pred_edge_coords_res)), f"{sample_name}_pred_edge_{128*2**i}res_wooffset")
+                    else:
+                        idx = i
+                        pred_edge_coords_res = decoded_results[i]['edge']['coords'].float()
+                        # gt_edge_coords_res = batch_data[gt_edge_key][:, 1:].float().to(self.device)
+                        gt_edge_coords_res = gt_edge_voxels_list[idx][:, 1:].float().to(self.device)
+                    # self._save_voxel_ply(gt_edge_coords_res / (128*2**i), torch.zeros(len(gt_edge_coords_res)), f"{sample_name}_gt_edge_{128*2**i}res")
+                    # self._save_voxel_ply(pred_edge_coords_res / (128*2**i), torch.zeros(len(pred_edge_coords_res)), f"{sample_name}_pred_edge_{128*2**i}res")
+                    e_metrics = self._compute_vertex_metrics(pred_edge_coords_res, gt_edge_coords_res, threshold=threshold)
+                    per_sample_metrics['edge'][res].append({
+                        'recall': e_metrics['recall'],
+                        'precision': e_metrics['precision'],
+                        'f1': e_metrics['f1'],
+                        'num_pred': len(pred_edge_coords_res),
+                        'num_gt': len(gt_edge_coords_res)
+                    })
+                    avg_metrics['edge'][res]['recall'].append(e_metrics['recall'])
+                    avg_metrics['edge'][res]['precision'].append(e_metrics['precision'])
+                    avg_metrics['edge'][res]['f1'].append(e_metrics['f1'])
+        avg_metrics_processed = {}
+        for category, res_dict in avg_metrics.items():
+            avg_metrics_processed[category] = {}
+            for resolution, metric_dict in res_dict.items():
+                avg_metrics_processed[category][resolution] = {
+                    metric_name: np.mean(values) if values else float('nan')
+                    for metric_name, values in metric_dict.items()
+                }
+        result_data = {
+            'config': self.config,
+            'checkpoint': self.ckpt_path,
+            'dataset': self.dataset_path,
+            'num_samples': eval_samples,
+            'per_sample_metrics': per_sample_metrics,
+            'avg_metrics': avg_metrics_processed
+        }
+        results_file_path = os.path.join(self.result_dir, f"evaluation_results_epoch{self.epoch}.yaml")
+        with open(results_file_path, 'w') as f:
+            yaml.dump(result_data, f, default_flow_style=False)
+        return result_data
+    def _generate_line_voxels(
+        self,
+        p1: torch.Tensor,
+        p2: torch.Tensor
+    ) -> Tuple[
+        List[Tuple[int, int, int]],
+        List[Tuple[torch.Tensor, torch.Tensor]],
+        List[np.ndarray]
+    ]:
+        """
+        Improved version using better sampling strategy
+        """
+        p1_np = p1 #.cpu().numpy()
+        p2_np = p2 #.cpu().numpy()
+        voxel_dict = OrderedDict()
+        # Use proper 3D line voxelization algorithm
+        def bresenham_3d(p1, p2):
+            """3D Bresenham's line algorithm"""
+            x1, y1, z1 = np.round(p1).astype(int)
+            x2, y2, z2 = np.round(p2).astype(int)
+            points = []
+            dx = abs(x2 - x1)
+            dy = abs(y2 - y1)
+            dz = abs(z2 - z1)
+            xs = 1 if x2 > x1 else -1
+            ys = 1 if y2 > y1 else -1
+            zs = 1 if z2 > z1 else -1
+            # Driving axis is X
+            if dx >= dy and dx >= dz:
+                err_1 = 2 * dy - dx
+                err_2 = 2 * dz - dx
+                for i in range(dx + 1):
+                    points.append((x1, y1, z1))
+                    if err_1 > 0:
+                        y1 += ys
+                        err_1 -= 2 * dx
+                    if err_2 > 0:
+                        z1 += zs
+                        err_2 -= 2 * dx
+                    err_1 += 2 * dy
+                    err_2 += 2 * dz
+                    x1 += xs
+            # Driving axis is Y
+            elif dy >= dx and dy >= dz:
+                err_1 = 2 * dx - dy
+                err_2 = 2 * dz - dy
+                for i in range(dy + 1):
+                    points.append((x1, y1, z1))
+                    if err_1 > 0:
+                        x1 += xs
+                        err_1 -= 2 * dy
+                    if err_2 > 0:
+                        z1 += zs
+                        err_2 -= 2 * dy
+                    err_1 += 2 * dx
+                    err_2 += 2 * dz
+                    y1 += ys
+            # Driving axis is Z
+            else:
+                err_1 = 2 * dx - dz
+                err_2 = 2 * dy - dz
+                for i in range(dz + 1):
+                    points.append((x1, y1, z1))
+                    if err_1 > 0:
+                        x1 += xs
+                        err_1 -= 2 * dz
+                    if err_2 > 0:
+                        y1 += ys
+                        err_2 -= 2 * dz
+                    err_1 += 2 * dx
+                    err_2 += 2 * dy
+                    z1 += zs
+            return points
+        # Get all voxels using Bresenham algorithm
+        voxel_coords = bresenham_3d(p1_np, p2_np)
+        # Add all voxels to dictionary
+        for coord in voxel_coords:
+            voxel_dict[tuple(coord)] = (p1, p2)
+        voxel_coords = list(voxel_dict.keys())
+        endpoint_pairs = list(voxel_dict.values())
+        # --- compute error vectors ---
+        error_vectors = []
+        diff = p2_np - p1_np
+        d_norm_sq = np.dot(diff, diff)
+        for v in voxel_coords:
+            v_center = np.array(v, dtype=float) + 0.5
+            if d_norm_sq == 0:  # degenerate line
+                closest = p1_np
+            else:
+                t = np.dot(v_center - p1_np, diff) / d_norm_sq
+                t = np.clip(t, 0.0, 1.0)
+                closest = p1_np + t * diff
+            error_vectors.append(v_center - closest)
+        return voxel_coords, endpoint_pairs, error_vectors
+# 使用示例
+def set_seed(seed: int):
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed(seed)
+        torch.cuda.manual_seed_all(seed)
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False
+def evaluate_checkpoint(ckpt_path, dataset_path, eval_dir):
+    set_seed(42)
+    tester = Tester(ckpt_path=ckpt_path, dataset_path=dataset_path)
+    result_data = tester.evaluate(num_samples=NUM_SAMPLES, visualize=VISUALIZE, chamfer_threshold=CHAMFER_EDGE_THRESHOLD, threshold=THRESHOLD)
+    # 生成文件名
+    epoch_str = os.path.basename(ckpt_path).split('_')[1].split('.')[0]
+    dataset_name = os.path.basename(os.path.normpath(dataset_path))
+    # 保存简版报告(TXT)
+    summary_path = os.path.join(eval_dir, f"epoch{epoch_str}_{dataset_name}_summary_threshold{THRESHOLD}_one2one.txt")
+    with open(summary_path, 'w') as f:
+        # 头部信息
+        f.write(f"Checkpoint: {os.path.basename(ckpt_path)}\n")
+        f.write(f"Dataset: {dataset_name}\n")
+        f.write(f"Evaluation Samples: {result_data['num_samples']}\n\n")
+        # 平均指标
+        f.write("=== Average Metrics ===\n")
+        for category, data in result_data['avg_metrics'].items():
+            if isinstance(data, dict):  # 处理多分辨率情况
+                f.write(f"\n{category.upper()}:\n")
+                for res, metrics in data.items():
+                    f.write(f"  Resolution {res}:\n")
+                    for k, v in metrics.items():
+                        # 确保值是数字类型后再格式化
+                        if isinstance(v, (int, float)):
+                            f.write(f"    {str(k).ljust(15)}: {v:.4f}\n")
+                        else:
+                            f.write(f"    {str(k).ljust(15)}: {str(v)}\n")
+            else:  # 处理非多分辨率情况
+                f.write(f"\n{category.upper()}:\n")
+                for k, v in data.items():
+                    if isinstance(v, (int, float)):
+                        f.write(f"  {str(k).ljust(15)}: {v:.4f}\n")
+                    else:
+                        f.write(f"  {str(k).ljust(15)}: {str(v)}\n")
+        # 样本级详细统计
+        f.write("\n\n=== Detailed Per-Sample Metrics ===\n")
+        for name, vertex_metrics, edge_metrics in zip(
+            result_data['per_sample_metrics']['sample_names'],
+            zip(*[result_data['per_sample_metrics']['vertex'][res] for res in [128, 256, 512, 1024]]),
+            zip(*[result_data['per_sample_metrics']['edge'][res] for res in [128, 256, 512, 1024]])
+        ):
+            # 样本标题
+            f.write(f"\n◆ Sample: {name}\n")
+            # 顶点指标
+            f.write(f"[Vertex Prediction]\n")
+            f.write(f"  {'Resolution'.ljust(10)} {'Recall'.ljust(8)} {'Precision'.ljust(8)} {'F1'.ljust(8)} {'Pred/Gt'.ljust(10)}\n")
+            for res, metrics in zip([128, 256, 512, 1024], vertex_metrics):
+                f.write(f"  {str(res).ljust(10)} "
+                      f"{metrics['recall']:.4f}    "
+                      f"{metrics['precision']:.4f}    "
+                      f"{metrics['f1']:.4f}    "
+                      f"{metrics['num_pred']}/{metrics['num_gt']}\n")
+            # Edge指标
+            f.write(f"[Edge Prediction]\n")
+            f.write(f"  {'Resolution'.ljust(10)} {'Recall'.ljust(8)} {'Precision'.ljust(8)} {'F1'.ljust(8)} {'Pred/Gt'.ljust(10)}\n")
+            for res, metrics in zip([128, 256, 512, 1024], edge_metrics):
+                f.write(f"  {str(res).ljust(10)} "
+                      f"{metrics['recall']:.4f}    "
+                      f"{metrics['precision']:.4f}    "
+                      f"{metrics['f1']:.4f}    "
+                      f"{metrics['num_pred']}/{metrics['num_gt']}\n")
+            f.write("-"*60 + "\n")
+    print(f"Saved summary to: {summary_path}")
+    return result_data
+if __name__ == '__main__':
+    with torch.cuda.amp.autocast(dtype=torch.bfloat16):
+        evaluate_all_checkpoints = True  # 设置为 True 启用范围过滤
+        EPOCH_START = 14
+        EPOCH_END = 460
+        CHAMFER_EDGE_THRESHOLD=0.5
+        NUM_SAMPLES=50
+        VISUALIZE=True
+        THRESHOLD=1.5
+        VISUAL_FIELD=False
+        ckpt_path = '/gemini/user/private/zhaotianhao/checkpoints/vae/train_9w_200_2000face/shapenet_bs2_128to1024_dir_sorted_dora_head_small_right/checkpoint_epoch14_batch5216_loss0.2745.pt'
+        dataset_path = '/gemini/user/private/zhaotianhao/data/MERGED_DATASET_count_200_2000_100000/test'
+        dataset_path = '/gemini/user/private/zhaotianhao/data/why_filter_unquantized'
+        if dataset_path == '/HOME/paratera_xy/pxy1054/HDD_POOL/Trisf/data/mesh/objaverse_200_2000':
+            RENDERS_DIR = '/HOME/paratera_xy/pxy1054/HDD_POOL/Trisf/data/mesh_render_img/objaverse_200_2000/renders_cond'
+        else:
+            RENDERS_DIR = ''
+        ckpt_dir = os.path.dirname(ckpt_path)
+        eval_dir = os.path.join(ckpt_dir, "evaluate")
+        os.makedirs(eval_dir, exist_ok=True)
+        if False:
+            for i in range(NUM_SAMPLES):
+                print("--- Starting Latent Space PCA Visualization ---")
+                tester = Tester(ckpt_path=ckpt_path, dataset_path=dataset_path)
+                tester.visualize_latent_space_pca(sample_idx=i)
+                print("--- PCA Visualization Finished ---")
+        if not evaluate_all_checkpoints:
+            evaluate_checkpoint(ckpt_path, dataset_path, eval_dir)
+        else:
+            pt_files = sorted([f for f in os.listdir(ckpt_dir) if f.endswith('.pt')])
+            filtered_pt_files = []
+            for f in pt_files:
+                try:
+                    parts = f.split('_')
+                    epoch_str = parts[1].replace('epoch', '')
+                    epoch = int(epoch_str)
+                    if EPOCH_START <= epoch <= EPOCH_END:
+                        filtered_pt_files.append(f)
+                except Exception as e:
+                    print(f"Warning: Could not parse epoch from {f}: {e}")
+                    continue
+            for pt_file in filtered_pt_files:
+                full_ckpt_path = os.path.join(ckpt_dir, pt_file)
+                evaluate_checkpoint(full_ckpt_path, dataset_path, eval_dir)

test_slat_vae_128to1024_pointnet_vae_head_woca.py ADDED Viewed

The diff for this file is too large to render. See raw diff

test_slat_vae_128to256_pointnet_vae_head.py ADDED Viewed

	@@ -0,0 +1,1349 @@

+import os
+import yaml
+import torch
+import numpy as np
+import random
+from tqdm import tqdm
+from collections import defaultdict
+import plotly.graph_objects as go
+from plotly.subplots import make_subplots
+from torch.utils.data import DataLoader, Subset
+from triposf.modules.sparse.basic import SparseTensor
+from triposf.models.triposf_vae.VoxelFeatureVAE_edge_woself_128to1024_decoder_head import VoxelVAE
+from vertex_encoder import VoxelFeatureEncoder_edge, VoxelFeatureEncoder_vtx, VoxelFeatureEncoder_active, VoxelFeatureEncoder_active_pointnet, ConnectionHead
+from utils import load_pretrained_woself
+from dataset_triposf_head import VoxelVertexDataset_edge, collate_fn_pointnet
+from functools import partial
+import itertools
+from typing import List, Tuple, Set
+from collections import OrderedDict
+from scipy.spatial import cKDTree
+from sklearn.neighbors import KDTree
+import trimesh
+import torch
+import torch.nn.functional as F
+import time
+from sklearn.decomposition import PCA
+import matplotlib.pyplot as plt
+import networkx as nx
+def predict_mesh_connectivity(
+    connection_head,
+    vtx_feats,
+    vtx_coords,
+    batch_size=10000,
+    threshold=0.5,
+    k_neighbors=64, # 限制每个点只检测最近的 K 个邻居，设为 -1 则全连接检测
+    device='cuda'
+):
+    """
+    Args:
+        connection_head: 训练好的 MLP 模型
+        vtx_feats: [N, C] 顶点特征
+        vtx_coords: [N, 3] 顶点坐标 (用于 KNN 筛选候选边)
+        batch_size: MLP 推理的 batch size
+        threshold: 判定连接的概率阈值
+        k_neighbors: K-NN 数量。如果是 None 或 -1，则检测所有 N*(N-1)/2 对。
+    """
+    num_verts = vtx_feats.shape[0]
+    if num_verts < 3:
+        return [], [] # 无法构成三角形
+    connection_head.eval()
+    # --- 1. 生成候选边 (Candidate Edges) ---
+    if k_neighbors is not None and k_neighbors > 0 and k_neighbors < num_verts:
+        # 策略 A: 局部 KNN (推荐)
+        # 计算距离矩阵可能会 OOM，使用分块或 KDTree/Faiss，这里用 PyTorch 的 cdist 分块简化版
+        # 或者直接暴力 cdist 如果 N < 10000
+        # 为了简单且高效，这里演示简单的 cdist (注意显存)
+        # 如果 N 很大 (>5000)，建议使用 faiss 或 scipy.spatial.cKDTree
+        dist_mat = torch.cdist(vtx_coords.float(), vtx_coords.float()) # [N, N]
+        # 取 topk (smallest distance)，排除自己
+        # values: [N, K], indices: [N, K]
+        _, indices = torch.topk(dist_mat, k=k_neighbors + 1, dim=1, largest=False)
+        neighbor_indices = indices[:, 1:] # 去掉第一列（自己）
+        # 构建 source, target 索引
+        src = torch.arange(num_verts, device=device).unsqueeze(1).repeat(1, k_neighbors).flatten()
+        dst = neighbor_indices.flatten()
+        # 此时得到的边是双向的 (u->v 和 v->u 可能都存在)，为了效率可以去重
+        # 但为了利用你的 symmetric MLP，保留双向或者只保留 u < v 均可
+        # 这里为了简单，我们生成 u < v 的 mask
+        mask = src < dst
+        u_indices = src[mask]
+        v_indices = dst[mask]
+    else:
+        # 策略 B: 全连接 (O(N^2)) - 仅当 N 较小时使用
+        u_indices, v_indices = torch.triu_indices(num_verts, num_verts, offset=1, device=device)
+    # --- 2. 批量推理 ---
+    all_probs = []
+    num_candidates = u_indices.shape[0]
+    with torch.no_grad():
+        for i in range(0, num_candidates, batch_size):
+            end = min(i + batch_size, num_candidates)
+            batch_u = u_indices[i:end]
+            batch_v = v_indices[i:end]
+            feat_u = vtx_feats[batch_u]
+            feat_v = vtx_feats[batch_v]
+            # Symmetric Forward (和你训练时保持一致)
+            # A -> B
+            input_uv = torch.cat([feat_u, feat_v], dim=-1)
+            logits_uv = connection_head(input_uv)
+            # B -> A
+            input_vu = torch.cat([feat_v, feat_u], dim=-1)
+            logits_vu = connection_head(input_vu)
+            # Sum logits
+            logits = logits_uv + logits_vu
+            probs = torch.sigmoid(logits)
+            all_probs.append(probs)
+    all_probs = torch.cat(all_probs).squeeze() # [M]
+    # --- 3. 筛选连接边 ---
+    connected_mask = all_probs > threshold
+    final_u = u_indices[connected_mask].cpu().numpy()
+    final_v = v_indices[connected_mask].cpu().numpy()
+    edges = np.stack([final_u, final_v], axis=1) # [E, 2]
+    return edges
+def build_triangles_from_edges(edges, num_verts):
+    """
+    从边列表构建三角形。
+    寻找图中所有的 3-Cliques (三元环)。
+    这在图论中是一个经典问题，可以使用 networkx 库。
+    """
+    if len(edges) == 0:
+        return np.empty((0, 3), dtype=int)
+    G = nx.Graph()
+    G.add_nodes_from(range(num_verts))
+    G.add_edges_from(edges)
+    # 寻找所有的 3-cliques (三角形)
+    # enumerate_all_cliques 返回所有大小的 clique，我们需要过滤大小为 3 的
+    # 或者使用 nx.triangles ? 不，那个只返回数量
+    # 使用 nx.enumerate_all_cliques 效率可能较低，对于稀疏图还可以
+    # 更快的方法：迭代每条边 (u, v)，查找 u 和 v 的公共邻居 w
+    triangles = []
+    adj = [set(G.neighbors(n)) for n in range(num_verts)]
+    # 为了避免重复 (u, v, w), (v, w, u)... 我们可以强制 u < v < w
+    # 既然 edges 已经是 u < v (如果我们之前做了 triu)，则只需要找 w > v 且 w in adj[u]
+    # 优化算法：
+    for u, v in edges:
+        if u > v: u, v = v, u # 确保有序
+        # 找公共邻居
+        common = adj[u].intersection(adj[v])
+        for w in common:
+            if w > v: # 强制顺序 u < v < w 防止重复
+                triangles.append([u, v, w])
+    return np.array(triangles)
+def downsample_voxels(
+    voxels: torch.Tensor,
+    input_resolution: int,
+    output_resolution: int
+) -> torch.Tensor:
+    if input_resolution % output_resolution != 0:
+        raise ValueError(f"input_resolution ({input_resolution}) must be divisible "
+                         f"by output_resolution ({output_resolution}).")
+    factor = input_resolution // output_resolution
+    downsampled_voxels = voxels.clone().to(torch.long)
+    downsampled_voxels[:, 1:] //= factor
+    unique_downsampled_voxels = torch.unique(downsampled_voxels, dim=0)
+    return unique_downsampled_voxels
+def visualize_colored_points_ply(coords, vectors, filename):
+    """
+    可视化点云，并用向量方向的颜色来表示，保存为 PLY 文件。
+    Args:
+        coords (torch.Tensor or np.ndarray): 3D坐标，形状为 (N, 3)。
+        vectors (torch.Tensor or np.ndarray): 方向向量，形状为 (N, 3)。
+        filename (str): 保存输出文件的名称，必须是 .ply 格式。
+    """
+    # 确保输入是 numpy 数组
+    if isinstance(coords, torch.Tensor):
+        coords = coords.detach().cpu().numpy()
+    if isinstance(vectors, torch.Tensor):
+        vectors = vectors.detach().cpu().to(torch.float32).numpy()
+    # 检查输入数据是否为空，防止崩溃
+    if coords.size == 0 or vectors.size == 0:
+        print(f"警告：输入数据为空，未生成 {filename} 文件。")
+        return
+    # 将向量分量从 [-1, 1] 映射到 [0, 255]
+    # np.clip 用于将数值限制在 -1 和 1 之间，防止颜色溢出
+    # (vectors + 1) 将范围从 [-1, 1] 移动到 [0, 2]
+    # * 127.5 将范围从 [0, 2] 缩放到 [0, 255]
+    colors = np.clip((vectors + 1) * 127.5, 0, 255).astype(np.uint8)
+    # 创建一个点云对象，并传入颜色信息
+    # trimesh.PointCloud 能够自动处理带颜色的点
+    points = trimesh.points.PointCloud(coords, colors=colors)
+    # 导出为 PLY 文件
+    points.export(filename, file_type='ply')
+    print(f"可视化文件已成功保存为: {filename}")
+def compute_vertex_matching(pred_coords, gt_coords, threshold=1.0):
+    """
+    使用 KDTree 最近邻 + 贪心匹配算法计算顶点匹配 (欧式距离)
+    参数:
+        pred_coords: 预测坐标 (Tensor)
+        gt_coords: 真实坐标 (Tensor)
+        threshold: 匹配误差阈值 (默认1.0)
+    返回:
+        matches: 匹配成功的顶点数量
+        match_rate: 匹配率 (基于真实顶点数)
+        pred_total: 预测顶点总数
+        gt_total: 真实顶点总数
+    """
+    # 转换为整数坐标并去重
+    print('len(pred_coords)', len(pred_coords))
+    pred_array = np.unique(pred_coords.detach().to(torch.float32).cpu().numpy(), axis=0)
+    gt_array = np.unique(gt_coords.detach().cpu().to(torch.float32).numpy(), axis=0)
+    print('len(pred_array)', len(pred_array))
+    pred_total = len(pred_array)
+    gt_total = len(gt_array)
+    # 如果没有点，直接返回
+    if pred_total == 0 or gt_total == 0:
+        return 0, 0.0, pred_total, gt_total
+    # 建立 KDTree（以 gt 为基准）
+    tree = KDTree(gt_array)
+    # 查找预测点到最近的 gt 点
+    dist, indices = tree.query(pred_array, k=1)
+    dist = dist.squeeze()
+    indices = indices.squeeze()
+    # 贪心去重：确保 1 对 1
+    matches = 0
+    used_gt = set()
+    for d, idx in zip(dist, indices):
+        if d <= threshold and idx not in used_gt:
+            matches += 1
+            used_gt.add(idx)
+    match_rate = matches / max(gt_total, 1)
+    return matches, match_rate, pred_total, gt_total
+def flatten_coords_4d(coords_4d: torch.Tensor):
+    coords_4d_long = coords_4d.long()
+    base_x = 256
+    base_y = 256 * 256
+    base_z = 256 * 256 * 256
+    flat_coords = coords_4d_long[:, 0] * base_z + \
+                  coords_4d_long[:, 1] * base_y + \
+                  coords_4d_long[:, 2] * base_x + \
+                  coords_4d_long[:, 3]
+    return flat_coords
+def flatten_coords_3d(coords_3d: torch.Tensor):
+    coords_3d_long = coords_3d #.long()
+    base_x = 256
+    base_y = 256 * 256
+    base_z = 256 * 256 * 256
+    flat_coords = coords_3d_long[:, 0] * base_z + \
+                  coords_3d_long[:, 1] * base_y + \
+                  coords_3d_long[:, 2] * base_x
+    return flat_coords
+class Tester:
+    def __init__(self, ckpt_path, config_path=None, dataset_path=None):
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.ckpt_path = ckpt_path
+        self.config = self._load_config(config_path)
+        self.dataset_path = dataset_path # or self.config['dataset']['path']
+        checkpoint = torch.load(self.ckpt_path, map_location='cpu')
+        self.epoch = checkpoint.get('epoch', 0)
+        self._init_models()
+        self._init_dataset()
+        self.result_dir = os.path.join(os.path.dirname(ckpt_path), "evaluation_results")
+        os.makedirs(self.result_dir, exist_ok=True)
+        dataset_name_clean = os.path.basename(self.dataset_path).replace('.npz', '').replace('.npy', '')
+        self.output_voxel_dir = os.path.join(os.path.dirname(ckpt_path),
+                                             f"epoch_{self.epoch}_{dataset_name_clean}_voxels_0_gs")
+        os.makedirs(self.output_voxel_dir, exist_ok=True)
+        self.output_obj_dir = os.path.join(os.path.dirname(ckpt_path),
+                                           f"epoch_{self.epoch}_{dataset_name_clean}_obj_0_gs")
+        os.makedirs(self.output_obj_dir, exist_ok=True)
+    def _save_voxel_ply(self, coords: torch.Tensor, labels: torch.Tensor, filename: str):
+        if coords.numel() == 0:
+            return
+        coords_np = coords.cpu().to(torch.float32).numpy()
+        labels_np = labels.cpu().to(torch.float32).numpy()
+        colors = np.zeros((coords_np.shape[0], 3), dtype=np.uint8)
+        colors[labels_np == 0] = [255, 0, 0]
+        colors[labels_np == 1] = [0, 0, 255]
+        try:
+            import trimesh
+            point_cloud = trimesh.PointCloud(vertices=coords_np, colors=colors)
+            ply_path = os.path.join(self.output_voxel_dir, f"{filename}.ply")
+            point_cloud.export(ply_path)
+        except ImportError:
+            ply_path = os.path.join(self.output_voxel_dir, f"{filename}.ply")
+            with open(ply_path, 'w') as f:
+                f.write("ply\n")
+                f.write("format ascii 1.0\n")
+                f.write(f"element vertex {coords_np.shape[0]}\n")
+                f.write("property float x\n")
+                f.write("property float y\n")
+                f.write("property float z\n")
+                f.write("property uchar red\n")
+                f.write("property uchar green\n")
+                f.write("property uchar blue\n")
+                f.write("end_header\n")
+                for i in range(coords_np.shape[0]):
+                    f.write(f"{coords_np[i,0]} {coords_np[i,1]} {coords_np[i,2]} {colors[i,0]} {colors[i,1]} {colors[i,2]}\n")
+    def _load_config(self, config_path=None):
+        if config_path and os.path.exists(config_path):
+            with open(config_path) as f:
+                return yaml.safe_load(f)
+        ckpt_dir = os.path.dirname(self.ckpt_path)
+        possible_configs = [
+            os.path.join(ckpt_dir, "config.yaml"),
+            os.path.join(os.path.dirname(ckpt_dir), "config.yaml")
+        ]
+        for config_file in possible_configs:
+            if os.path.exists(config_file):
+                with open(config_file) as f:
+                    print(f"Loaded config from: {config_file}")
+                    return yaml.safe_load(f)
+        checkpoint = torch.load(self.ckpt_path, map_location='cpu')
+        if 'config' in checkpoint:
+            print("Loaded config from checkpoint")
+            return checkpoint['config']
+        raise FileNotFoundError("Could not find config_edge.yaml in checkpoint directory or parent, and config not saved in checkpoint.")
+    def _init_models(self):
+        self.voxel_encoder = VoxelFeatureEncoder_active_pointnet(
+            in_channels=15,
+            hidden_dim=256,
+            out_channels=1024,
+            scatter_type='mean',
+            n_blocks=5,
+            resolution=128,
+        ).to(self.device)
+        self.connection_head = ConnectionHead(
+            channels=128 * 2,
+            out_channels=1,
+            mlp_ratio=4,
+        ).to(self.device)
+        self.vae = VoxelVAE( # abalation: VoxelVAE_1volume_dilation
+            in_channels=self.config['model']['in_channels'],
+            latent_dim=self.config['model']['latent_dim'],
+            encoder_blocks=self.config['model']['encoder_blocks'],
+            # decoder_blocks=self.config['model']['decoder_blocks'],
+            decoder_blocks_vtx=self.config['model']['decoder_blocks_vtx'],
+            decoder_blocks_edge=self.config['model']['decoder_blocks_edge'],
+            num_heads=8,
+            num_head_channels=64,
+            mlp_ratio=4.0,
+            attn_mode="swin",
+            window_size=8,
+            pe_mode="ape",
+            use_fp16=False,
+            use_checkpoint=False,
+            qk_rms_norm=False,
+            using_subdivide=True,
+            using_attn=self.config['model']['using_attn'],
+            attn_first=self.config['model'].get('attn_first', True),
+            pred_direction=self.config['model'].get('pred_direction', False),
+        ).to(self.device)
+        load_pretrained_woself(
+            checkpoint_path=self.ckpt_path,
+            voxel_encoder=self.voxel_encoder,
+            connection_head=self.connection_head,
+            vae=self.vae,
+        )
+        # --- 【新增】在这里添加权重检查逻辑 ---
+        print(f"--- 正在检查权重文件中的 NaN/Inf 值... ---")
+        has_nan_inf = False
+        if self._check_weights_for_nan_inf(self.vae, "VoxelVAE"):
+            has_nan_inf = True
+        if self._check_weights_for_nan_inf(self.voxel_encoder, "Vertex Encoder"):
+            has_nan_inf = True
+        if self._check_weights_for_nan_inf(self.connection_head, "Connection Head"):
+            has_nan_inf = True
+        if not has_nan_inf:
+            print("--- 权重检查通过。未发现 NaN/Inf 值。 ---")
+        else:
+            # 如果发现坏值，直接抛出异常，因为评估无法继续
+            raise ValueError(f"在检查点 '{self.ckpt_path}' 中发现了 NaN 或 Inf 值。请检查导致训练不稳定的权重文件。")
+        # --- 检查逻辑结束 ---
+        self.vae.eval()
+        self.voxel_encoder.eval()
+        self.connection_head.eval()
+    def _init_dataset(self):
+        self.dataset = VoxelVertexDataset_edge(
+            root_dir=self.dataset_path,
+            base_resolution=self.config['dataset']['base_resolution'],
+            min_resolution=self.config['dataset']['min_resolution'],
+            # cache_dir='./dataset_cache/test_15c_dora',
+            cache_dir=self.config['dataset']['cache_dir'],
+            renders_dir=self.config['dataset']['renders_dir'],
+            # filter_active_voxels=self.config['dataset']['filter_active_voxels'],
+            filter_active_voxels=False,
+            cache_filter_path=self.config['dataset']['cache_filter_path'],
+            sample_type=self.config['dataset']['sample_type'],
+            active_voxel_res=128,
+            pc_sample_number=819200,
+        )
+        self.dataloader = DataLoader(
+            self.dataset,
+            batch_size=1,
+            shuffle=False,
+            collate_fn=partial(collate_fn_pointnet),
+            num_workers=0,
+            pin_memory=True,
+            # prefetch_factor=4,
+        )
+    def _check_weights_for_nan_inf(self, model: torch.nn.Module, model_name: str) -> bool:
+        """
+        检查模型的所有参数中是否存在 NaN 或 Inf 值。
+        Args:
+            model (torch.nn.Module): 要检查的模型。
+            model_name (str): 模型的名称，用于打印日志。
+        Returns:
+            bool: 如果找到 NaN 或 Inf，则返回 True，否则返回 False。
+        """
+        found_issue = False
+        for name, param in model.named_parameters():
+            if torch.isnan(param.data).any():
+                print(f"[!!!] 严重错误: 在模型 '{model_name}' 的参数 '{name}' 中发现 NaN 值！")
+                found_issue = True
+            if torch.isinf(param.data).any():
+                print(f"[!!!] 严重错误: 在模型 '{model_name}' 的参数 '{name}' 中发现 Inf 值！")
+                found_issue = True
+        return found_issue
+    def _compute_vertex_metrics(self, pred_coords, gt_coords, threshold=1.0):
+        """
+        修改后的函数，确保一对一匹配，并优先匹配最近的点对。
+        """
+        pred_array = np.unique(pred_coords.round().int().cpu().numpy(), axis=0)
+        gt_array = np.unique(gt_coords.round().int().cpu().numpy(), axis=0)
+        pred_total = len(pred_array)
+        gt_total = len(gt_array)
+        if pred_total == 0 or gt_total == 0:
+            return {
+                'recall': 0.0,
+                'precision': 0.0,
+                'f1': 0.0,
+                'matches': 0,
+                'pred_count': pred_total,
+                'gt_count': gt_total
+            }
+        # 依然在预测点上构建KD-Tree，为每个真实点查找最近的预测点
+        tree = cKDTree(pred_array)
+        dists, pred_idxs = tree.query(gt_array, k=1)
+        # --- 核心修改部分 ---
+        # 1. 创建一个列表，包含 (距离, 真实点索引, 预测点索引)
+        #    这样我们就可以按距离对所有可能的匹配进行排序
+        possible_matches = []
+        for gt_idx, (dist, pred_idx) in enumerate(zip(dists, pred_idxs)):
+            if dist <= threshold:
+                possible_matches.append((dist, gt_idx, pred_idx))
+        # 2. 按距离从小到大排序（贪心策略）
+        possible_matches.sort(key=lambda x: x[0])
+        matches = 0
+        # 使用集合来跟踪已经使用过的预测点和真实点，确保一对一匹配
+        used_pred_indices = set()
+        used_gt_indices = set() # 虽然当前逻辑下gt不会重复，但加上更严谨
+        # 3. 遍历排序后的可能匹配，进行一对一分配
+        for dist, gt_idx, pred_idx in possible_matches:
+            # 如果这个预测点和这个真实点都还没有被使用过
+            if pred_idx not in used_pred_indices and gt_idx not in used_gt_indices:
+                matches += 1
+                used_pred_indices.add(pred_idx)
+                used_gt_indices.add(gt_idx)
+        # --- 修改结束 ---
+        # matches 现在是真正的 True Positives 数量，它绝不会超过 pred_total 或 gt_total
+        recall = matches / gt_total if gt_total > 0 else 0.0
+        precision = matches / pred_total if pred_total > 0 else 0.0
+        # 计算F1时，使用标准的 Precision 和 Recall 定义
+        if (precision + recall) == 0:
+            f1 = 0.0
+        else:
+            f1 = 2 * (precision * recall) / (precision + recall)
+        return {
+            'recall': recall,
+            'precision': precision,
+            'f1': f1,
+            'matches': matches,
+            'pred_count': pred_total,
+            'gt_count': gt_total
+        }
+    def _compute_vertex_metrics(self, pred_coords, gt_coords, threshold=1.0):
+        """
+        一个折衷的顶点指标计算方案。
+        它沿用“为每个真实点寻找最近预测点”的逻辑，
+        但通过修正计算方式，确保Precision和F1值不会超过1.0。
+        """
+        # 假设 pred_coords 和 gt_coords 是 PyTorch 张量
+        pred_array = np.unique(pred_coords.round().int().cpu().numpy(), axis=0)
+        gt_array = np.unique(gt_coords.round().int().cpu().numpy(), axis=0)
+        pred_total = len(pred_array)
+        gt_total = len(gt_array)
+        if pred_total == 0 or gt_total == 0:
+            return {
+                'recall': 0.0,
+                'precision': 0.0,
+                'f1': 0.0,
+                'matches': 0,
+                'pred_count': pred_total,
+                'gt_count': gt_total
+            }
+        # 在预测点上构建KD-Tree，为每个真实点查找最近的预测点
+        tree = cKDTree(pred_array)
+        dists, _ = tree.query(gt_array, k=1) # 我们在这里其实不需要 pred 的索引
+        # 1. 计算从 gt 角度出发的匹配数 (True Positives for Recall)
+        #    这和您的第一个函数完全一样。
+        #    这个值代表了“有多少个真实点被成功找到了”。
+        matches_from_gt = np.sum(dists <= threshold)
+        # 2. 计算 Recall (召回率)
+        #    召回率的分母是真实点的总数，所以这里的计算是合理的。
+        recall = matches_from_gt / gt_total if gt_total > 0 else 0.0
+        # 3. 计算 Precision (精确率) - ✅ 这是核心修正点
+        #    精确率的分母是预测点的总数。
+        #    分子（True Positives）不能超过预测点的总数。
+        #    因此，我们取 matches_from_gt 和 pred_total 中的较小值。
+        #    这解决了 Precision > 1 的问题。
+        tp_for_precision = min(matches_from_gt, pred_total)
+        precision = tp_for_precision / pred_total if pred_total > 0 else 0.0
+        # 4. 使用标准的F1分数公式
+        #    您原来的 F1 公式 `2 * matches / (pred + gt)` 是 L1-Score，
+        #    更常用的是基于 Precision 和 Recall 的调和平均数。
+        if (precision + recall) == 0:
+            f1 = 0.0
+        else:
+            f1 = 2 * (precision * recall) / (precision + recall)
+        return {
+            'recall': recall,
+            'precision': precision,
+            'f1': f1,
+            'matches': matches_from_gt, # 仍然报告原始的匹配数，便于观察
+            'pred_count': pred_total,
+            'gt_count': gt_total
+        }
+    def _compute_chamfer_distance(self, p1: torch.Tensor, p2: torch.Tensor, one_sided: bool = False):
+        if len(p1) == 0 or len(p2) == 0:
+            return float('nan')
+        dist_p1_p2 = torch.min(torch.cdist(p1, p2), dim=1)[0].mean()
+        if one_sided:
+            return dist_p1_p2.item()
+        else:
+            dist_p2_p1 = torch.min(torch.cdist(p2, p1), dim=1)[0].mean()
+            return (dist_p1_p2 + dist_p2_p1).item() / 2
+    def visualize_latent_space_pca(self, sample_idx: int):
+        """
+        Encodes a sample, performs PCA on its latent features, and saves a
+        colored PLY file for visualization.
+        The position of each point in the PLY file corresponds to the spatial
+        location in the latent grid.
+        The color of each point represents the first three principal components
+        of its feature vector.
+        """
+        print(f"--- Starting Latent Space PCA Visualization for Sample {sample_idx} ---")
+        self.vae.eval()
+        try:
+            # 1. Get the latent representation for the sample
+            latent = self._get_latent_for_sample(sample_idx)
+        except ValueError as e:
+            print(f"Error: {e}")
+            return
+        latent_coords = latent.coords.detach().cpu().numpy()
+        latent_feats = latent.feats.detach().cpu().numpy()
+        if latent_feats.shape[0] < 3:
+            print(f"Warning: Not enough latent points ({latent_feats.shape[0]}) to perform PCA. Skipping.")
+            return
+        print(f"--> Performing PCA on {latent_feats.shape[0]} latent vectors of dimension {latent_feats.shape[1]}...")
+        # 2. Perform PCA to reduce feature dimensions to 3
+        pca = PCA(n_components=3)
+        pca_features = pca.fit_transform(latent_feats)
+        print(f"    Explained variance ratio by 3 components: {pca.explained_variance_ratio_}")
+        print(f"    Total explained variance: {np.sum(pca.explained_variance_ratio_):.4f}")
+        # 3. Normalize the PCA components to be used as RGB colors [0, 255]
+        # We normalize each component independently to maximize color contrast
+        normalized_colors = np.zeros_like(pca_features)
+        for i in range(3):
+            min_val = pca_features[:, i].min()
+            max_val = pca_features[:, i].max()
+            if max_val - min_val > 1e-6:
+                normalized_colors[:, i] = (pca_features[:, i] - min_val) / (max_val - min_val)
+            else:
+                normalized_colors[:, i] = 0.5 # Handle case of constant value
+        colors_uint8 = (normalized_colors * 255).astype(np.uint8)
+        # 4. Prepare spatial coordinates for the point cloud
+        # latent_coords is (batch_idx, x, y, z), we want the xyz part
+        spatial_coords = latent_coords[:, 1:]
+        # 5. Create and save the colored PLY file
+        try:
+            # Create a Trimesh PointCloud object
+            point_cloud = trimesh.points.PointCloud(vertices=spatial_coords, colors=colors_uint8)
+            # Define the output filename
+            filename = f"sample_{sample_idx}_latent_pca.ply"
+            ply_path = os.path.join(self.output_voxel_dir, filename)
+            # Export the file
+            point_cloud.export(ply_path)
+            print(f"--> Successfully saved PCA visualization to: {ply_path}")
+        except Exception as e:
+            print(f"Error during Trimesh export: {e}")
+            print("Please ensure 'trimesh' is installed correctly.")
+    def _get_latent_for_sample(self, sample_idx: int) -> SparseTensor:
+        """
+        Encodes a single sample and returns its latent representation.
+        """
+        print(f"--> Encoding sample {sample_idx} to get its latent vector...")
+        # Get data for the specified sample
+        batch_data = self.dataset[sample_idx]
+        if batch_data is None:
+            raise ValueError(f"Sample at index {sample_idx} could not be loaded.")
+        # Use the collate function to form a batch
+        batch_data = collate_fn_pointnet([batch_data])
+        with torch.no_grad():
+            # 1. Get input data and move to device
+            gt_vertex_voxels_256 = batch_data['gt_vertex_voxels_256'].to(self.device)
+            combined_voxels_256 = batch_data['combined_voxels_256'].to(self.device)
+            active_coords = batch_data['active_voxels_128'].to(self.device)
+            point_cloud = batch_data['point_cloud_128'].to(self.device)
+            vtx_128 = downsample_voxels(gt_vertex_voxels_256, input_resolution=256, output_resolution=128)
+            edge_128 = downsample_voxels(combined_voxels_256, input_resolution=256, output_resolution=128)
+            active_voxel_feats = self.voxel_encoder(
+                p=point_cloud,
+                sparse_coords=active_coords,
+                res=128,
+                bbox_size=(-0.5, 0.5),
+                # voxel_label=active_labels,
+            )
+            sparse_input = SparseTensor(
+                feats=active_voxel_feats,
+                coords=active_coords.int()
+            )
+            # 2. Encode to get the latent representation
+            latent_128, posterior = self.vae.encode(sparse_input, sample_posterior=True,)
+            print(f"    Latent for sample {sample_idx} obtained. Shape: {latent_128.feats.shape}")
+            return latent_128
+    def evaluate(self, num_samples=None, visualize=False, chamfer_threshold=0.9, threshold=1.):
+        total_samples = len(self.dataset)
+        eval_samples = min(num_samples or total_samples, total_samples)
+        # sample_indices = random.sample(range(total_samples), eval_samples) if num_samples else range(total_samples)
+        sample_indices = range(eval_samples)
+        eval_dataset = Subset(self.dataset, sample_indices)
+        eval_loader = DataLoader(
+            eval_dataset,
+            batch_size=1,
+            shuffle=False,
+            collate_fn=partial(collate_fn_pointnet),
+            num_workers=self.config['training']['num_workers'],
+            pin_memory=True,
+        )
+        per_sample_metrics = {
+            'vertex': {res: [] for res in [128, 256]},
+            'edge': {res: [] for res in [128, 256]},
+            'sample_names': []
+        }
+        avg_metrics = {
+            'vertex': {res: defaultdict(list) for res in [128, 256]},
+            'edge': {res: defaultdict(list) for res in [128, 256]},
+        }
+        self.vae.eval()
+        for batch_idx, batch_data in enumerate(tqdm(eval_loader, desc="Evaluating")):
+            if batch_data is None:
+                continue
+            sample_idx = sample_indices[batch_idx]
+            sample_name = f'sample_{sample_idx}'
+            per_sample_metrics['sample_names'].append(sample_name)
+            # batch_save_path = f"/gemini/user/private/zhaotianhao/checkpoints/output_slat_flow_matching_active/8w_128to256_head_rope/215000_sample_active_vis_42seed_1000complex/gt_data_batch_{batch_idx}.pt"
+            # if not os.path.exists(batch_save_path):
+            #     print(f"Warning: Saved batch file not found: {batch_save_path}")
+            #     continue
+            # batch_data = torch.load(batch_save_path, map_location=self.device)
+            with torch.no_grad():
+                # 1. Get input data
+                combined_voxels_256 = batch_data['combined_voxels_256'].to(self.device)
+                combined_voxel_labels_256 = batch_data['combined_voxel_labels_256'].to(self.device)
+                gt_combined_endpoints_256 = batch_data['gt_combined_endpoints_256'].to(self.device)
+                gt_combined_errors_256 = batch_data['gt_combined_errors_256'].to(self.device)
+                edge_mask = (combined_voxel_labels_256 == 1)
+                gt_edge_endpoints_256 = gt_combined_endpoints_256[edge_mask].to(self.device)
+                gt_edge_errors_256 = gt_combined_errors_256[edge_mask].to(self.device)
+                gt_edge_voxels_256 = combined_voxels_256[edge_mask].to(self.device)
+                p1 = gt_edge_endpoints_256[:, 1:4].float()
+                p2 = gt_edge_endpoints_256[:, 4:7].float()
+                mask = ( (p1[:,0] < p2[:,0]) |
+                        ((p1[:,0] == p2[:,0]) & (p1[:,1] < p2[:,1])) |
+                        ((p1[:,0] == p2[:,0]) & (p1[:,1] == p2[:,1]) & (p1[:,2] <= p2[:,2])) )
+                pA = torch.where(mask[:, None], p1, p2)  # smaller one
+                pB = torch.where(mask[:, None], p2, p1)  # larger one
+                d = pB - pA
+                dir_gt = F.normalize(d, dim=-1, eps=1e-6)
+                gt_vertex_voxels_256 = batch_data['gt_vertex_voxels_256'].to(self.device).int()
+                vtx_128 = downsample_voxels(gt_vertex_voxels_256, input_resolution=256, output_resolution=128)
+                edge_128 = downsample_voxels(combined_voxels_256, input_resolution=256, output_resolution=128)
+                edge_256 = combined_voxels_256
+                print('vtx_128.shape', vtx_128.shape)
+                print('edge_128.shape', edge_128.shape)
+                gt_edge_voxels_list = [
+                    edge_128,
+                    edge_256,
+                ]
+                active_coords = batch_data['active_voxels_128'].to(self.device)
+                point_cloud = batch_data['point_cloud_128'].to(self.device)
+                active_voxel_feats = self.voxel_encoder(
+                    p=point_cloud,
+                    sparse_coords=active_coords,
+                    res=128,
+                    bbox_size=(-0.5, 0.5),
+                    # voxel_label=active_labels,
+                )
+                sparse_input = SparseTensor(
+                    feats=active_voxel_feats,
+                    coords=active_coords.int()
+                )
+                latent_128, posterior = self.vae.encode(sparse_input)
+                # load_path = f'/gemini/user/private/zhaotianhao/checkpoints/output_slat_flow_matching_active/8w_128to256_head_rope/215000_sample_active_vis_42seed_1000complex/sample_latent_{batch_idx}.pt'
+                # latent_128 = torch.load(load_path, map_location=self.device)
+                print('latent_128.feats.mean()', latent_128.feats.mean(), 'latent_128.feats.std()', latent_128.feats.std())
+                print('posterior.mean', posterior.mean.mean(), 'posterior.std', posterior.std.mean(), 'posterior.var', posterior.var.mean())
+                print('latent_128.coords.shape', latent_128.coords.shape)
+                # latent_128 = torch.load(f"/root/Trisf/output_slat_flow_matching/ckpts/1100_chair_sample/110000step_sample/sample_results_samples_{batch_idx}.pt", map_location=self.device)
+                latent_128 = SparseTensor(
+                    coords=latent_128.coords,
+                    feats=latent_128.feats + 0. * torch.randn_like(latent_128.feats),
+                )
+                # self.output_voxel_dir = os.path.dirname(load_path)
+                # self.output_obj_dir = os.path.dirname(load_path)
+                # 7. Decoding with separate vertex and edge processing
+                decoded_results = self.vae.decode(
+                    latent_128,
+                    gt_vertex_voxels_list=[],
+                    gt_edge_voxels_list=[],
+                    training=False,
+                    inference_threshold=0.5,
+                    vis_last_layer=False,
+                )
+                error = 0 # decoded_results[-1]['edge']['predicted_offset_feats']
+                if self.config['model'].get('pred_direction', False):
+                    pred_dir = decoded_results[-1]['edge']['predicted_direction_feats']
+                    zero_mask = (pred_dir == 0).all(dim=1)  # [N]，True 表示这一行全为0
+                    num_zeros = zero_mask.sum().item()
+                    print("Number of zero vectors:", num_zeros)
+                    pred_edge_coords_3d = decoded_results[-1]['edge']['coords']
+                    print('pred_edge_coords_3d.shape', pred_edge_coords_3d.shape)
+                    print('pred_dir.shape', pred_dir.shape)
+                    if pred_edge_coords_3d.shape[-1] == 4:
+                        pred_edge_coords_3d = pred_edge_coords_3d[:, 1:]
+                    # visualize_directions(pred_edge_coords_3d, pred_dir, sample_ratio=0.02)
+                    save_pth = os.path.join(self.output_voxel_dir, f"{sample_name}_direction.ply")
+                    # visualize_colored_points_ply(pred_edge_coords_3d - error / 2. + 0.5, pred_dir, save_pth)
+                    visualize_colored_points_ply(pred_edge_coords_3d, pred_dir, save_pth)
+                    save_pth = os.path.join(self.output_voxel_dir, f"{sample_name}_direction_gt.ply")
+                    # visualize_colored_points_ply((gt_edge_voxels_256[:, 1:] - gt_edge_errors_256[:, 1:] + 0.5), dir_gt, save_pth)
+                    visualize_colored_points_ply((gt_edge_voxels_256[:, 1:]), dir_gt, save_pth)
+                pred_vtx_coords_3d = decoded_results[-1]['vertex']['coords']
+                pred_edge_coords_3d = decoded_results[-1]['edge']['coords']
+                pred_edge_coords_np = np.round(pred_edge_coords_3d.cpu().numpy()).astype(int)
+                gt_vertex_voxels_256 = batch_data['gt_vertex_voxels_256'][:, 1:].to(self.device)
+                gt_edge_voxels_256 = batch_data['gt_edge_voxels_256'][:, 1:].to(self.device)
+                gt_edge_coords_np = np.round(gt_edge_voxels_256.cpu().numpy()).astype(int)
+                # Calculate metrics and save results
+                matches, match_rate, pred_total, gt_total = compute_vertex_matching(pred_vtx_coords_3d, gt_vertex_voxels_256, threshold=threshold,)
+                print(f"\n----- Resolution {256} vtx -----")
+                print(f"Pred Vertices: {pred_total} | GT Vertices: {gt_total}")
+                print(f"Matched Vertices: {matches} | Match Rate: {match_rate:.2%}")
+                self._save_voxel_ply(pred_vtx_coords_3d / 256., torch.zeros(len(pred_vtx_coords_3d)), f"{sample_name}_pred_vtx")
+                self._save_voxel_ply((pred_edge_coords_3d - error / 2. + 0.5) / 256, torch.zeros(len(pred_edge_coords_3d)), f"{sample_name}_pred_edge")
+                self._save_voxel_ply(gt_vertex_voxels_256 / 256, torch.zeros(len(gt_vertex_voxels_256)), f"{sample_name}_gt_vertex")
+                self._save_voxel_ply((combined_voxels_256[:, 1:] - gt_combined_errors_256[:, 1:] + 0.5) / 256., torch.zeros(len(gt_combined_errors_256)), f"{sample_name}_gt_edge")
+                # Calculate vertex-specific metrics
+                matches, match_rate, pred_total, gt_total = compute_vertex_matching(pred_edge_coords_3d, combined_voxels_256[:, 1:], threshold=threshold,)
+                print(f"\n----- Resolution {256} edge -----")
+                print('pred_edge_coords_3d.shape', pred_edge_coords_3d.shape)
+                print('gt_edge_voxels_256.shape', gt_edge_voxels_256.shape)
+                print(f"Pred Vertices: {pred_total} | GT Vertices: {gt_total}")
+                print(f"Matched Vertices: {matches} | Match Rate: {match_rate:.2%}")
+                pred_vertex_coords_np = np.round(pred_vtx_coords_3d.cpu().numpy()).astype(int)
+                pred_edges = []
+                gt_vertex_coords_np = np.round(gt_vertex_voxels_256.cpu().numpy()).astype(int)
+                if visualize:
+                    if pred_vtx_coords_3d.shape[-1] == 4:
+                        pred_vtx_coords_float = pred_vtx_coords_3d[:, 1:].float()
+                    else:
+                        pred_vtx_coords_float = pred_vtx_coords_3d.float()
+                    pred_vtx_feats = decoded_results[-1]['vertex']['feats']
+                # ==========================================
+                # Link Prediction & Mesh Generation
+                # ==========================================
+                print("Predicting connectivity...")
+                # 1. 预测边
+                # 注意：K_neighbors 的设置。如果是物体，64 足够了。
+                # 如果点非常稀疏，可能需要更大。
+                pred_edges = predict_mesh_connectivity(
+                    connection_head=self.connection_head, # 或者是 self.connection_head，取决于你在哪里定义的
+                    vtx_feats=pred_vtx_feats,
+                    vtx_coords=pred_vtx_coords_float,
+                    batch_size=4096,
+                    threshold=0.5,
+                    k_neighbors=None,
+                    device=self.device
+                )
+                print(f"Predicted {len(pred_edges)} edges.")
+                # 2. 构建三角形
+                num_verts = pred_vtx_coords_float.shape[0]
+                pred_faces = build_triangles_from_edges(pred_edges, num_verts)
+                print(f"Constructed {len(pred_faces)} triangles.")
+                # 3. 保存 OBJ
+                import trimesh
+                # 坐标归一化/还原 (根据你的需求，这里假设你是 0-256 的体素坐标)
+                # 如果想保存为归一化坐标：
+                mesh_verts = pred_vtx_coords_float.cpu().numpy() / 256.0
+                # 如果有 error offset，记得加上！
+                # 你之前的代码好像没有对 vertex 加 offset，只对 edge 加了
+                # 如果 vertex 也有 offset (如 dual contouring)，在这里加上
+                # 移动到中心 (可选)
+                mesh_verts = mesh_verts - 0.5
+                mesh = trimesh.Trimesh(vertices=mesh_verts, faces=pred_faces)
+                # 过滤孤立点 (可选)
+                # mesh.remove_unreferenced_vertices()
+                output_obj_path = os.path.join(self.output_voxel_dir, f"{sample_name}_recon.obj")
+                mesh.export(output_obj_path)
+                print(f"Saved mesh to {output_obj_path}")
+                # 保存边线 (用于 Debug)
+                # 有时候三角形很难形成，只看边也很有用
+                edges_path = os.path.join(self.output_voxel_dir, f"{sample_name}_edges.ply")
+                # self._visualize_vertices(pred_edge_coords_np, gt_edge_coords_np, f"{sample_name}_edge_comparison")
+                # Process results at different resolutions
+                for i, res in enumerate([128, 256]):
+                    if i >= len(decoded_results):
+                        continue
+                    gt_key = f'gt_vertex_voxels_{res}'
+                    if gt_key not in batch_data:
+                        continue
+                    if i == 0:
+                        pred_coords_res = decoded_results[i]['vtx_sp'].coords[:, 1:].float()
+                        gt_coords_res = batch_data[gt_key][:, 1:].float().to(self.device)
+                    else:
+                        pred_coords_res = decoded_results[i]['vertex']['coords'].float()
+                        gt_coords_res = batch_data[gt_key][:, 1:].float().to(self.device)
+                    v_metrics = self._compute_vertex_metrics(pred_coords_res, gt_coords_res, threshold=threshold)
+                    per_sample_metrics['vertex'][res].append({
+                        'recall': v_metrics['recall'],
+                        'precision': v_metrics['precision'],
+                        'f1': v_metrics['f1'],
+                        'num_pred': len(pred_coords_res),
+                        'num_gt': len(gt_coords_res)
+                    })
+                    avg_metrics['vertex'][res]['recall'].append(v_metrics['recall'])
+                    avg_metrics['vertex'][res]['precision'].append(v_metrics['precision'])
+                    avg_metrics['vertex'][res]['f1'].append(v_metrics['f1'])
+                    gt_edge_key = f'gt_edge_voxels_{res}'
+                    if gt_edge_key not in batch_data:
+                        continue
+                    if i == 0:
+                        pred_edge_coords_res = decoded_results[i]['edge_sp'].coords[:, 1:].float()
+                        # gt_edge_coords_res = batch_data[gt_edge_key][:, 1:].float().to(self.device)
+                        idx = i
+                        gt_edge_coords_res = gt_edge_voxels_list[idx][:, 1:].float().to(self.device)
+                    elif i == 1:
+                        idx = i
+                        #################################
+                        # pred_edge_coords_res = decoded_results[i]['edge']['coords'].float() - error / 2. + 0.5
+                        # # gt_edge_coords_res = batch_data[gt_edge_key][:, 1:].float().to(self.device)
+                        # gt_edge_coords_res = gt_edge_voxels_list[idx][:, 1:].float().to(self.device) - gt_combined_errors_256[:, 1:].to(self.device) + 0.5
+                        pred_edge_coords_res = decoded_results[i]['edge']['coords'].float()
+                        gt_edge_coords_res = gt_edge_voxels_list[idx][:, 1:].float().to(self.device)
+                        # self._save_voxel_ply(gt_edge_voxels_list[idx][:, 1:].float().to(self.device) / (128*2**i), torch.zeros(len(gt_edge_coords_res)), f"{sample_name}_gt_edge_{128*2**i}res_wooffset")
+                        # self._save_voxel_ply(decoded_results[i]['edge']['coords'].float() / (128*2**i), torch.zeros(len(pred_edge_coords_res)), f"{sample_name}_pred_edge_{128*2**i}res_wooffset")
+                    else:
+                        idx = i
+                        pred_edge_coords_res = decoded_results[i]['edge']['coords'].float()
+                        # gt_edge_coords_res = batch_data[gt_edge_key][:, 1:].float().to(self.device)
+                        gt_edge_coords_res = gt_edge_voxels_list[idx][:, 1:].float().to(self.device)
+                    # self._save_voxel_ply(gt_edge_coords_res / (128*2**i), torch.zeros(len(gt_edge_coords_res)), f"{sample_name}_gt_edge_{128*2**i}res")
+                    # self._save_voxel_ply(pred_edge_coords_res / (128*2**i), torch.zeros(len(pred_edge_coords_res)), f"{sample_name}_pred_edge_{128*2**i}res")
+                    e_metrics = self._compute_vertex_metrics(pred_edge_coords_res, gt_edge_coords_res, threshold=threshold)
+                    per_sample_metrics['edge'][res].append({
+                        'recall': e_metrics['recall'],
+                        'precision': e_metrics['precision'],
+                        'f1': e_metrics['f1'],
+                        'num_pred': len(pred_edge_coords_res),
+                        'num_gt': len(gt_edge_coords_res)
+                    })
+                    avg_metrics['edge'][res]['recall'].append(e_metrics['recall'])
+                    avg_metrics['edge'][res]['precision'].append(e_metrics['precision'])
+                    avg_metrics['edge'][res]['f1'].append(e_metrics['f1'])
+        avg_metrics_processed = {}
+        for category, res_dict in avg_metrics.items():
+            avg_metrics_processed[category] = {}
+            for resolution, metric_dict in res_dict.items():
+                avg_metrics_processed[category][resolution] = {
+                    metric_name: np.mean(values) if values else float('nan')
+                    for metric_name, values in metric_dict.items()
+                }
+        result_data = {
+            'config': self.config,
+            'checkpoint': self.ckpt_path,
+            'dataset': self.dataset_path,
+            'num_samples': eval_samples,
+            'per_sample_metrics': per_sample_metrics,
+            'avg_metrics': avg_metrics_processed
+        }
+        results_file_path = os.path.join(self.result_dir, f"evaluation_results_epoch{self.epoch}.yaml")
+        with open(results_file_path, 'w') as f:
+            yaml.dump(result_data, f, default_flow_style=False)
+        return result_data
+    def _generate_line_voxels(
+        self,
+        p1: torch.Tensor,
+        p2: torch.Tensor
+    ) -> Tuple[
+        List[Tuple[int, int, int]],
+        List[Tuple[torch.Tensor, torch.Tensor]],
+        List[np.ndarray]
+    ]:
+        """
+        Improved version using better sampling strategy
+        """
+        p1_np = p1 #.cpu().numpy()
+        p2_np = p2 #.cpu().numpy()
+        voxel_dict = OrderedDict()
+        # Use proper 3D line voxelization algorithm
+        def bresenham_3d(p1, p2):
+            """3D Bresenham's line algorithm"""
+            x1, y1, z1 = np.round(p1).astype(int)
+            x2, y2, z2 = np.round(p2).astype(int)
+            points = []
+            dx = abs(x2 - x1)
+            dy = abs(y2 - y1)
+            dz = abs(z2 - z1)
+            xs = 1 if x2 > x1 else -1
+            ys = 1 if y2 > y1 else -1
+            zs = 1 if z2 > z1 else -1
+            # Driving axis is X
+            if dx >= dy and dx >= dz:
+                err_1 = 2 * dy - dx
+                err_2 = 2 * dz - dx
+                for i in range(dx + 1):
+                    points.append((x1, y1, z1))
+                    if err_1 > 0:
+                        y1 += ys
+                        err_1 -= 2 * dx
+                    if err_2 > 0:
+                        z1 += zs
+                        err_2 -= 2 * dx
+                    err_1 += 2 * dy
+                    err_2 += 2 * dz
+                    x1 += xs
+            # Driving axis is Y
+            elif dy >= dx and dy >= dz:
+                err_1 = 2 * dx - dy
+                err_2 = 2 * dz - dy
+                for i in range(dy + 1):
+                    points.append((x1, y1, z1))
+                    if err_1 > 0:
+                        x1 += xs
+                        err_1 -= 2 * dy
+                    if err_2 > 0:
+                        z1 += zs
+                        err_2 -= 2 * dy
+                    err_1 += 2 * dx
+                    err_2 += 2 * dz
+                    y1 += ys
+            # Driving axis is Z
+            else:
+                err_1 = 2 * dx - dz
+                err_2 = 2 * dy - dz
+                for i in range(dz + 1):
+                    points.append((x1, y1, z1))
+                    if err_1 > 0:
+                        x1 += xs
+                        err_1 -= 2 * dz
+                    if err_2 > 0:
+                        y1 += ys
+                        err_2 -= 2 * dz
+                    err_1 += 2 * dx
+                    err_2 += 2 * dy
+                    z1 += zs
+            return points
+        # Get all voxels using Bresenham algorithm
+        voxel_coords = bresenham_3d(p1_np, p2_np)
+        # Add all voxels to dictionary
+        for coord in voxel_coords:
+            voxel_dict[tuple(coord)] = (p1, p2)
+        voxel_coords = list(voxel_dict.keys())
+        endpoint_pairs = list(voxel_dict.values())
+        # --- compute error vectors ---
+        error_vectors = []
+        diff = p2_np - p1_np
+        d_norm_sq = np.dot(diff, diff)
+        for v in voxel_coords:
+            v_center = np.array(v, dtype=float) + 0.5
+            if d_norm_sq == 0:  # degenerate line
+                closest = p1_np
+            else:
+                t = np.dot(v_center - p1_np, diff) / d_norm_sq
+                t = np.clip(t, 0.0, 1.0)
+                closest = p1_np + t * diff
+            error_vectors.append(v_center - closest)
+        return voxel_coords, endpoint_pairs, error_vectors
+# 使用示例
+def set_seed(seed: int):
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed(seed)
+        torch.cuda.manual_seed_all(seed)
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False
+def evaluate_checkpoint(ckpt_path, dataset_path, eval_dir):
+    set_seed(42)
+    tester = Tester(ckpt_path=ckpt_path, dataset_path=dataset_path)
+    result_data = tester.evaluate(num_samples=NUM_SAMPLES, visualize=VISUALIZE, chamfer_threshold=CHAMFER_EDGE_THRESHOLD, threshold=THRESHOLD)
+    # 生成文件名
+    epoch_str = os.path.basename(ckpt_path).split('_')[1].split('.')[0]
+    dataset_name = os.path.basename(os.path.normpath(dataset_path))
+    # 保存简版报告(TXT)
+    summary_path = os.path.join(eval_dir, f"epoch{epoch_str}_{dataset_name}_summary_threshold{THRESHOLD}_one2one.txt")
+    with open(summary_path, 'w') as f:
+        # 头部信息
+        f.write(f"Checkpoint: {os.path.basename(ckpt_path)}\n")
+        f.write(f"Dataset: {dataset_name}\n")
+        f.write(f"Evaluation Samples: {result_data['num_samples']}\n\n")
+        # 平均指标
+        f.write("=== Average Metrics ===\n")
+        for category, data in result_data['avg_metrics'].items():
+            if isinstance(data, dict):  # 处理多分辨率情况
+                f.write(f"\n{category.upper()}:\n")
+                for res, metrics in data.items():
+                    f.write(f"  Resolution {res}:\n")
+                    for k, v in metrics.items():
+                        # 确保值是数字类型后再格式化
+                        if isinstance(v, (int, float)):
+                            f.write(f"    {str(k).ljust(15)}: {v:.4f}\n")
+                        else:
+                            f.write(f"    {str(k).ljust(15)}: {str(v)}\n")
+            else:  # 处理非多分辨率情况
+                f.write(f"\n{category.upper()}:\n")
+                for k, v in data.items():
+                    if isinstance(v, (int, float)):
+                        f.write(f"  {str(k).ljust(15)}: {v:.4f}\n")
+                    else:
+                        f.write(f"  {str(k).ljust(15)}: {str(v)}\n")
+        # 样本级详细统计
+        f.write("\n\n=== Detailed Per-Sample Metrics ===\n")
+        for name, vertex_metrics, edge_metrics in zip(
+            result_data['per_sample_metrics']['sample_names'],
+            zip(*[result_data['per_sample_metrics']['vertex'][res] for res in [128, 256]]),
+            zip(*[result_data['per_sample_metrics']['edge'][res] for res in [128, 256]])
+        ):
+            # 样本标题
+            f.write(f"\n◆ Sample: {name}\n")
+            # 顶点指标
+            f.write(f"[Vertex Prediction]\n")
+            f.write(f"  {'Resolution'.ljust(10)} {'Recall'.ljust(8)} {'Precision'.ljust(8)} {'F1'.ljust(8)} {'Pred/Gt'.ljust(10)}\n")
+            for res, metrics in zip([128, 256], vertex_metrics):
+                f.write(f"  {str(res).ljust(10)} "
+                      f"{metrics['recall']:.4f}    "
+                      f"{metrics['precision']:.4f}    "
+                      f"{metrics['f1']:.4f}    "
+                      f"{metrics['num_pred']}/{metrics['num_gt']}\n")
+            # Edge指标
+            f.write(f"[Edge Prediction]\n")
+            f.write(f"  {'Resolution'.ljust(10)} {'Recall'.ljust(8)} {'Precision'.ljust(8)} {'F1'.ljust(8)} {'Pred/Gt'.ljust(10)}\n")
+            for res, metrics in zip([128, 256], edge_metrics):
+                f.write(f"  {str(res).ljust(10)} "
+                      f"{metrics['recall']:.4f}    "
+                      f"{metrics['precision']:.4f}    "
+                      f"{metrics['f1']:.4f}    "
+                      f"{metrics['num_pred']}/{metrics['num_gt']}\n")
+            f.write("-"*60 + "\n")
+    print(f"Saved summary to: {summary_path}")
+    return result_data
+if __name__ == '__main__':
+    with torch.cuda.amp.autocast(dtype=torch.bfloat16):
+        evaluate_all_checkpoints = True  # 设置为 True 启用范围过滤
+        EPOCH_START = 12
+        EPOCH_END = 12
+        CHAMFER_EDGE_THRESHOLD=0.5
+        NUM_SAMPLES=50
+        VISUALIZE=True
+        THRESHOLD=1.5
+        VISUAL_FIELD=False
+        ckpt_path = '/gemini/user/private/zhaotianhao/checkpoints/vae/train_9w_200_2000face/shapenet_bs2_128to256_dir_sorted_dora_head_small/checkpoint_epoch13_batch6000_loss0.1381.pt'
+        dataset_path = '/gemini/user/private/zhaotianhao/data/test_mesh'
+        if dataset_path == '/HOME/paratera_xy/pxy1054/HDD_POOL/Trisf/data/mesh/objaverse_200_2000':
+            RENDERS_DIR = '/HOME/paratera_xy/pxy1054/HDD_POOL/Trisf/data/mesh_render_img/objaverse_200_2000/renders_cond'
+        else:
+            RENDERS_DIR = ''
+        ckpt_dir = os.path.dirname(ckpt_path)
+        eval_dir = os.path.join(ckpt_dir, "evaluate")
+        os.makedirs(eval_dir, exist_ok=True)
+        if False:
+            for i in range(NUM_SAMPLES):
+                print("--- Starting Latent Space PCA Visualization ---")
+                tester = Tester(ckpt_path=ckpt_path, dataset_path=dataset_path)
+                tester.visualize_latent_space_pca(sample_idx=i)
+                print("--- PCA Visualization Finished ---")
+        if not evaluate_all_checkpoints:
+            evaluate_checkpoint(ckpt_path, dataset_path, eval_dir)
+        else:
+            pt_files = sorted([f for f in os.listdir(ckpt_dir) if f.endswith('.pt')])
+            filtered_pt_files = []
+            for f in pt_files:
+                try:
+                    parts = f.split('_')
+                    epoch_str = parts[1].replace('epoch', '')
+                    epoch = int(epoch_str)
+                    if EPOCH_START <= epoch <= EPOCH_END:
+                        filtered_pt_files.append(f)
+                except Exception as e:
+                    print(f"Warning: Could not parse epoch from {f}: {e}")
+                    continue
+            for pt_file in filtered_pt_files:
+                full_ckpt_path = os.path.join(ckpt_dir, pt_file)
+                evaluate_checkpoint(full_ckpt_path, dataset_path, eval_dir)

test_slat_vae_128to512_pointnet_vae_head.py ADDED Viewed

	@@ -0,0 +1,1636 @@

+import os
+import yaml
+import torch
+import numpy as np
+import random
+from tqdm import tqdm
+from collections import defaultdict
+import plotly.graph_objects as go
+from plotly.subplots import make_subplots
+from torch.utils.data import DataLoader, Subset
+from triposf.modules.sparse.basic import SparseTensor
+from triposf.models.triposf_vae.VoxelFeatureVAE_edge_woself_128to1024_decoder_head import VoxelVAE
+from vertex_encoder import VoxelFeatureEncoder_edge, VoxelFeatureEncoder_vtx, VoxelFeatureEncoder_active, VoxelFeatureEncoder_active_pointnet, ConnectionHead
+from utils import load_pretrained_woself
+from dataset_triposf_head import VoxelVertexDataset_edge, collate_fn_pointnet
+from functools import partial
+import itertools
+from typing import List, Tuple, Set
+from collections import OrderedDict
+from scipy.spatial import cKDTree
+from sklearn.neighbors import KDTree
+import trimesh
+import torch
+import torch.nn.functional as F
+import time
+from sklearn.decomposition import PCA
+import matplotlib.pyplot as plt
+import networkx as nx
+def predict_mesh_connectivity(
+    connection_head,
+    vtx_feats,
+    vtx_coords,
+    batch_size=10000,
+    threshold=0.5,
+    k_neighbors=64, # 限制每个点只检测最近的 K 个邻居，设为 -1 则全连接检测
+    device='cuda'
+):
+    """
+    Args:
+        connection_head: 训练好的 MLP 模型
+        vtx_feats: [N, C] 顶点特征
+        vtx_coords: [N, 3] 顶点坐标 (用于 KNN 筛选候选边)
+        batch_size: MLP 推理的 batch size
+        threshold: 判定连接的概率阈值
+        k_neighbors: K-NN 数量。如果是 None 或 -1，则检测所有 N*(N-1)/2 对。
+    """
+    num_verts = vtx_feats.shape[0]
+    if num_verts < 3:
+        return [], [] # 无法构成三角形
+    connection_head.eval()
+    # --- 1. 生成候选边 (Candidate Edges) ---
+    if k_neighbors is not None and k_neighbors > 0 and k_neighbors < num_verts:
+        # 策略 A: 局部 KNN (推荐)
+        # 计算距离矩阵可能会 OOM，使用分块或 KDTree/Faiss，这里用 PyTorch 的 cdist 分块简化版
+        # 或者直接暴力 cdist 如果 N < 10000
+        # 为了简单且高效，这里演示简单的 cdist (注意显存)
+        # 如果 N 很大 (>5000)，建议使用 faiss 或 scipy.spatial.cKDTree
+        dist_mat = torch.cdist(vtx_coords.float(), vtx_coords.float()) # [N, N]
+        # 取 topk (smallest distance)，排除自己
+        # values: [N, K], indices: [N, K]
+        _, indices = torch.topk(dist_mat, k=k_neighbors + 1, dim=1, largest=False)
+        neighbor_indices = indices[:, 1:] # 去掉第一列（自己）
+        # 构建 source, target 索引
+        src = torch.arange(num_verts, device=device).unsqueeze(1).repeat(1, k_neighbors).flatten()
+        dst = neighbor_indices.flatten()
+        # 此时得到的边是双向的 (u->v 和 v->u 可能都存在)，为了效率可以去重
+        # 但为了利用你的 symmetric MLP，保留双向或者只保留 u < v 均可
+        # 这里为了简单，我们生成 u < v 的 mask
+        mask = src < dst
+        u_indices = src[mask]
+        v_indices = dst[mask]
+    else:
+        # 策略 B: 全连接 (O(N^2)) - 仅当 N 较小时使用
+        u_indices, v_indices = torch.triu_indices(num_verts, num_verts, offset=1, device=device)
+    # --- 2. 批量推理 ---
+    all_probs = []
+    num_candidates = u_indices.shape[0]
+    with torch.no_grad():
+        for i in range(0, num_candidates, batch_size):
+            end = min(i + batch_size, num_candidates)
+            batch_u = u_indices[i:end]
+            batch_v = v_indices[i:end]
+            feat_u = vtx_feats[batch_u]
+            feat_v = vtx_feats[batch_v]
+            # Symmetric Forward (和你训练时保持一致)
+            # A -> B
+            input_uv = torch.cat([feat_u, feat_v], dim=-1)
+            logits_uv = connection_head(input_uv)
+            # B -> A
+            input_vu = torch.cat([feat_v, feat_u], dim=-1)
+            logits_vu = connection_head(input_vu)
+            # Sum logits
+            logits = (logits_uv + logits_vu) / 2.
+            probs = torch.sigmoid(logits)
+            all_probs.append(probs)
+    all_probs = torch.cat(all_probs).squeeze() # [M]
+    # --- 3. 筛选连接边 ---
+    connected_mask = all_probs > threshold
+    final_u = u_indices[connected_mask].cpu().numpy()
+    final_v = v_indices[connected_mask].cpu().numpy()
+    edges = np.stack([final_u, final_v], axis=1) # [E, 2]
+    return edges
+def build_triangles_from_edges(edges, num_verts):
+    """
+    从边列表构建三角形。
+    寻找图中所有的 3-Cliques (三元环)。
+    这在图论中是一个经典问题，可以使用 networkx 库。
+    """
+    if len(edges) == 0:
+        return np.empty((0, 3), dtype=int)
+    G = nx.Graph()
+    G.add_nodes_from(range(num_verts))
+    G.add_edges_from(edges)
+    # 寻找所有的 3-cliques (三角形)
+    # enumerate_all_cliques 返回所有大小的 clique，我们需要过滤大小为 3 的
+    # 或者使用 nx.triangles ? 不，那个只返回数量
+    # 使用 nx.enumerate_all_cliques 效率可能较低，对于稀疏图还可以
+    # 更快的方法：迭代每条边 (u, v)，查找 u 和 v 的公共邻居 w
+    triangles = []
+    adj = [set(G.neighbors(n)) for n in range(num_verts)]
+    # 为了避免重复 (u, v, w), (v, w, u)... 我们可以强制 u < v < w
+    # 既然 edges 已经是 u < v (如果我们之前做了 triu)，则只需要找 w > v 且 w in adj[u]
+    # 优化算法：
+    for u, v in edges:
+        if u > v: u, v = v, u # 确保有序
+        # 找公共邻居
+        common = adj[u].intersection(adj[v])
+        for w in common:
+            if w > v: # 强制顺序 u < v < w 防止重复
+                triangles.append([u, v, w])
+    return np.array(triangles)
+def downsample_voxels(
+    voxels: torch.Tensor,
+    input_resolution: int,
+    output_resolution: int
+) -> torch.Tensor:
+    if input_resolution % output_resolution != 0:
+        raise ValueError(f"input_resolution ({input_resolution}) must be divisible "
+                         f"by output_resolution ({output_resolution}).")
+    factor = input_resolution // output_resolution
+    downsampled_voxels = voxels.clone().to(torch.long)
+    downsampled_voxels[:, 1:] //= factor
+    unique_downsampled_voxels = torch.unique(downsampled_voxels, dim=0)
+    return unique_downsampled_voxels
+def visualize_colored_points_ply(coords, vectors, filename):
+    """
+    可视化点云，并用向量方向的颜色来表示，保存为 PLY 文件。
+    Args:
+        coords (torch.Tensor or np.ndarray): 3D坐标，形状为 (N, 3)。
+        vectors (torch.Tensor or np.ndarray): 方向向量，形状为 (N, 3)。
+        filename (str): 保存输出文件的名称，必须是 .ply 格式。
+    """
+    # 确保输入是 numpy 数组
+    if isinstance(coords, torch.Tensor):
+        coords = coords.detach().cpu().numpy()
+    if isinstance(vectors, torch.Tensor):
+        vectors = vectors.detach().cpu().to(torch.float32).numpy()
+    # 检查输入数据是否为空，防止崩溃
+    if coords.size == 0 or vectors.size == 0:
+        print(f"警告：输入数据为空，未生成 {filename} 文件。")
+        return
+    # 将向量分量从 [-1, 1] 映射到 [0, 255]
+    # np.clip 用于将数值限制在 -1 和 1 之间，防止颜色溢出
+    # (vectors + 1) 将范围从 [-1, 1] 移动到 [0, 2]
+    # * 127.5 将范围从 [0, 2] 缩放到 [0, 255]
+    colors = np.clip((vectors + 1) * 127.5, 0, 255).astype(np.uint8)
+    # 创建一个点云对象，并传入颜色信息
+    # trimesh.PointCloud 能够自动处理带颜色的点
+    points = trimesh.points.PointCloud(coords, colors=colors)
+    # 导出为 PLY 文件
+    points.export(filename, file_type='ply')
+    print(f"可视化文件已成功保存为: {filename}")
+def compute_vertex_matching(pred_coords, gt_coords, threshold=1.0):
+    # 转换为整数坐标并去重
+    print('len(pred_coords)', len(pred_coords))
+    pred_array = np.unique(pred_coords.detach().to(torch.float32).cpu().numpy(), axis=0)
+    gt_array = np.unique(gt_coords.detach().cpu().to(torch.float32).numpy(), axis=0)
+    print('len(pred_array)', len(pred_array))
+    pred_total = len(pred_array)
+    gt_total = len(gt_array)
+    # 如果没有点，直接返回
+    if pred_total == 0 or gt_total == 0:
+        return 0, 0.0, pred_total, gt_total
+    # 建立 KDTree（以 gt 为基准）
+    tree = KDTree(gt_array)
+    # 查找预测点到最近的 gt 点
+    dist, indices = tree.query(pred_array, k=1)
+    dist = dist.squeeze()
+    indices = indices.squeeze()
+    # 贪心去重：确保 1 对 1
+    matches = 0
+    used_gt = set()
+    for d, idx in zip(dist, indices):
+        if d <= threshold and idx not in used_gt:
+            matches += 1
+            used_gt.add(idx)
+    match_rate = matches / max(gt_total, 1)
+    return matches, match_rate, pred_total, gt_total
+def flatten_coords_4d(coords_4d: torch.Tensor):
+    coords_4d_long = coords_4d.long()
+    base_x = 512
+    base_y = 512 * 512
+    base_z = 512 * 512 * 512
+    flat_coords = coords_4d_long[:, 0] * base_z + \
+                  coords_4d_long[:, 1] * base_y + \
+                  coords_4d_long[:, 2] * base_x + \
+                  coords_4d_long[:, 3]
+    return flat_coords
+class Tester:
+    def __init__(self, ckpt_path, config_path=None, dataset_path=None):
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.ckpt_path = ckpt_path
+        self.config = self._load_config(config_path)
+        self.dataset_path = dataset_path # or self.config['dataset']['path']
+        checkpoint = torch.load(self.ckpt_path, map_location='cpu')
+        self.epoch = checkpoint.get('epoch', 0)
+        self._init_models()
+        self._init_dataset()
+        self.result_dir = os.path.join(os.path.dirname(ckpt_path), "evaluation_results")
+        os.makedirs(self.result_dir, exist_ok=True)
+        dataset_name_clean = os.path.basename(self.dataset_path).replace('.npz', '').replace('.npy', '')
+        self.output_voxel_dir = os.path.join(os.path.dirname(ckpt_path),
+                                             f"epoch_{self.epoch}_{dataset_name_clean}_voxels_0_gs")
+        os.makedirs(self.output_voxel_dir, exist_ok=True)
+        self.output_obj_dir = os.path.join(os.path.dirname(ckpt_path),
+                                           f"epoch_{self.epoch}_{dataset_name_clean}_obj_0_gs")
+        os.makedirs(self.output_obj_dir, exist_ok=True)
+    def _save_logit_visualization(self, dense_vol, name, sample_name, ply_threshold=0.01):
+        """
+        保存 Logit 的 3D .npy 文件、2D 最大投影热力图，以及带颜色和透明度的 3D .ply 点云
+        Args:
+            dense_vol: (H, W, D) numpy array, values in [0, 1]
+            name: str (e.g., "edge" or "vertex")
+            sample_name: str
+            ply_threshold: float, 只有概率大于此值的点才会被保存
+        """
+        # 1. 保存原始 Dense 数据 (可选)
+        npy_path = os.path.join(self.output_voxel_dir, f"{sample_name}_{name}_logits.npy")
+        # np.save(npy_path, dense_vol)
+        # 2. 生成 2D 投影热力图 (保持不变)
+        proj_x = np.max(dense_vol, axis=0)
+        proj_y = np.max(dense_vol, axis=1)
+        proj_z = np.max(dense_vol, axis=2)
+        fig, axes = plt.subplots(1, 3, figsize=(15, 5))
+        im0 = axes[0].imshow(proj_x, cmap='turbo', vmin=0, vmax=1, origin='lower')
+        axes[0].set_title(f"{name} Max-Proj (YZ)")
+        im1 = axes[1].imshow(proj_y, cmap='turbo', vmin=0, vmax=1, origin='lower')
+        axes[1].set_title(f"{name} Max-Proj (XZ)")
+        im2 = axes[2].imshow(proj_z, cmap='turbo', vmin=0, vmax=1, origin='lower')
+        axes[2].set_title(f"{name} Max-Proj (XY)")
+        fig.colorbar(im2, ax=axes, orientation='vertical', fraction=0.02, pad=0.04)
+        plt.suptitle(f"{sample_name} - {name} Occupancy Probability")
+        png_path = os.path.join(self.output_voxel_dir, f"{sample_name}_{name}_heatmap.png")
+        plt.savefig(png_path, dpi=150)
+        plt.close(fig)
+        # ------------------------------------------------------------------
+        # 3. 保存为带颜色和透明度(RGBA)的 PLY 点云
+        # ------------------------------------------------------------------
+        # 筛选出概率大于阈值的点坐标
+        indices = np.argwhere(dense_vol > ply_threshold)
+        if len(indices) > 0:
+            # 获取这些点的概率值 [0, 1]
+            values = dense_vol[indices[:, 0], indices[:, 1], indices[:, 2]]
+            # 使用 matplotlib 的 colormap 进行颜色映射
+            import matplotlib.cm as cm
+            cmap = cm.get_cmap('turbo')
+            # map values [0, 1] to RGBA [0, 1] (N, 4)
+            colors_float = cmap(values)
+            # -------------------------------------------------------
+            # 【核心修改】：修改 Alpha 通道 (透明度)
+            # -------------------------------------------------------
+            # 让透明度直接等于概率值。
+            # 概率 1.0 -> Alpha 1.0 (完全不透明/颜色深)
+            # 概率 0.1 -> Alpha 0.1 (非常透明/颜色浅)
+            colors_float[:, 3] = values
+            # 转换为 uint8 [0, 255]，保留 4 个通道 (R, G, B, A)
+            colors_uint8 = (colors_float * 255).astype(np.uint8)
+            # 坐标转换
+            vertices = indices
+            ply_filename = f"{sample_name}_{name}_logits_colored.ply"
+            ply_save_path = os.path.join(self.output_voxel_dir, ply_filename)
+            try:
+                # 使用 Trimesh 保存 (Trimesh 支持 (N, 4) 的 colors)
+                pcd = trimesh.points.PointCloud(vertices=vertices, colors=colors_uint8)
+                pcd.export(ply_save_path)
+                print(f"Saved colored RGBA logit PLY to {ply_save_path}")
+            except Exception as e:
+                print(f"Failed to save PLY with trimesh: {e}")
+                # Fallback: 手动写入 PLY (需要添加 alpha 属性)
+                with open(ply_save_path, 'w') as f:
+                    f.write("ply\n")
+                    f.write("format ascii 1.0\n")
+                    f.write(f"element vertex {len(vertices)}\n")
+                    f.write("property float x\n")
+                    f.write("property float y\n")
+                    f.write("property float z\n")
+                    f.write("property uchar red\n")
+                    f.write("property uchar green\n")
+                    f.write("property uchar blue\n")
+                    f.write("property uchar alpha\n") # 新增 Alpha 属性
+                    f.write("end_header\n")
+                    for i in range(len(vertices)):
+                        v = vertices[i]
+                        c = colors_uint8[i] # c is now (R, G, B, A)
+                        f.write(f"{v[0]} {v[1]} {v[2]} {c[0]} {c[1]} {c[2]} {c[3]}\n")
+    def _point_line_segment_distance(self, px, py, pz, x1, y1, z1, x2, y2, z2):
+        """
+        计算点 (px,py,pz) 到线段 (x1,y1,z1)-(x2,y2,z2) 的最短距离的平方。
+        全部输入为 Tensor，支持广播。
+        """
+        # 线段向量 AB
+        ABx = x2 - x1
+        ABy = y2 - y1
+        ABz = z2 - z1
+        # 向量 AP
+        APx = px - x1
+        APy = py - y1
+        APz = pz - z1
+        # AB 的长度平方
+        AB_sq = ABx**2 + ABy**2 + ABz**2
+        # 避免除以0 (如果两端点重合)
+        AB_sq = torch.clamp(AB_sq, min=1e-6)
+        # 投影系数 t = (AP · AB) / |AB|^2
+        t = (APx * ABx + APy * ABy + APz * ABz) / AB_sq
+        # 限制 t 在 [0, 1] 之间（线段约束）
+        t = torch.clamp(t, 0.0, 1.0)
+        # 最近点 (Projection)
+        closestX = x1 + t * ABx
+        closestY = y1 + t * ABy
+        closestZ = z1 + t * ABz
+        # 距离平方
+        dx = px - closestX
+        dy = py - closestY
+        dz = pz - closestZ
+        return dx**2 + dy**2 + dz**2
+    def _extract_mesh_projection_based(
+        self,
+        vtx_result: dict,
+        edge_result: dict,
+        resolution: int = 1024,
+        vtx_prob_threshold: float = 0.5,
+        # --- 你的新逻辑参数 ---
+        search_radius: float = 128.0,      # 1. 候选边最大长度
+        project_dist_thresh: float = 1.5,  # 2. 投影距离阈值 (管子半径，单位：voxel)
+        dir_align_threshold: float = 0.6,  # 3. 方向相似度阈值 (cos theta)
+        connect_ratio_threshold: float = 0.4, # 4. 最终连接阈值 (匹配点数 / 理论长度)
+        edge_prob_threshold: float = 0.1,  # 仅仅用于提取"存在的"体素
+    ):
+        t_start = time.perf_counter()
+        # ---------------------------------------------------------------------
+        # 1. 准备全局数据：提取所有"活着"的 Edge Voxels (作为点云处理)
+        # ---------------------------------------------------------------------
+        e_probs = torch.sigmoid(edge_result['occ_probs'][:, 0])
+        e_coords = edge_result['coords_4d'][:, 1:].float() # (N, 3)
+        # 获取方向向量
+        if 'predicted_direction_feats' in edge_result:
+            e_dirs = edge_result['predicted_direction_feats'] # (N, 3)
+            # 归一化方向
+            e_dirs = F.normalize(e_dirs, p=2, dim=1)
+        else:
+            print("Warning: No direction features, using dummy.")
+            e_dirs = torch.zeros_like(e_coords)
+        # 筛选有效的 Edge Voxels (Global Point Cloud)
+        valid_mask = e_probs > edge_prob_threshold
+        cloud_coords = e_coords[valid_mask] # (M, 3)
+        cloud_dirs = e_dirs[valid_mask]     # (M, 3)
+        num_cloud = cloud_coords.shape[0]
+        print(f"[Projection] Global active edge voxels: {num_cloud}")
+        if num_cloud == 0:
+             return [], []
+        # ---------------------------------------------------------------------
+        # 2. 准备顶点和候选边
+        # ---------------------------------------------------------------------
+        v_probs = torch.sigmoid(vtx_result['occ_probs'][:, 0])
+        v_coords = vtx_result['coords_4d'][:, 1:].float()
+        v_mask = v_probs > vtx_prob_threshold
+        valid_v_coords = v_coords[v_mask] # (V, 3)
+        if valid_v_coords.shape[0] < 2:
+            return valid_v_coords.cpu().numpy() / resolution, []
+        # 生成所有可能的候选边 (基于距离粗筛)
+        dists = torch.cdist(valid_v_coords, valid_v_coords)
+        triu_mask = torch.triu(torch.ones_like(dists), diagonal=1).bool()
+        cand_mask = (dists < search_radius) & triu_mask
+        cand_indices = torch.nonzero(cand_mask, as_tuple=False) # (E_cand, 2)
+        p1s = valid_v_coords[cand_indices[:, 0]] # (E, 3)
+        p2s = valid_v_coords[cand_indices[:, 1]] # (E, 3)
+        num_candidates = p1s.shape[0]
+        print(f"[Projection] Checking {num_candidates} candidate pairs...")
+        # ---------------------------------------------------------------------
+        # 3. 循环处理候选边 (使用 Bounding Box 快速裁剪)
+        # ---------------------------------------------------------------------
+        final_edges = []
+        # 预计算所有候选边的方向和长度
+        edge_vecs = p2s - p1s
+        edge_lengths = torch.norm(edge_vecs, dim=1)
+        edge_dirs = F.normalize(edge_vecs, p=2, dim=1)
+        # 为了避免显存爆炸，也不要在 Python 里做太慢的循环
+        # 我们对点云进行操作太慢，对每一条边去遍历整个点云也太慢。
+        # 策略：
+        # 我们循环“边”，但在循环内部利用 mask 快速筛选点云。
+        # 由于 Python 循环 10000 次会很慢，我们只处理那些有希望的边。
+        # 这里为了演示逻辑的准确性，我们使用简单的循环，但在 GPU 上做计算。
+        # 将全局点云拆分到各个坐标轴，便于快速 BBox 筛选
+        cx, cy, cz = cloud_coords[:, 0], cloud_coords[:, 1], cloud_coords[:, 2]
+        # 优化：如果候选边太多，可以分块。这里假设边在 5万以内，点在 10万以内，可以处理。
+        # 这一步是瓶颈，我们尝试用 Python 循环，但只对局部点计算
+        # 为了加速，我们可以将点云放入 HashGrid 或者只是简单的 BBox Check。
+        # 让我们用简单的逻辑：对于每条边，找出 BBox 内的点，算距离。
+        # 这里的 batch_size 是指一次并行处理多少条边
+        batch_size = 128 # 每次处理 128 条边
+        for i in range(0, num_candidates, batch_size):
+            end = min(i + batch_size, num_candidates)
+            # 当前批次的边数据
+            b_p1 = p1s[i:end] # (B, 3)
+            b_p2 = p2s[i:end] # (B, 3)
+            b_dirs = edge_dirs[i:end] # (B, 3)
+            b_lens = edge_lengths[i:end] # (B,)
+            # --- 步骤 A: 投影 & 距离检查 ---
+            # 这是一个 (B, M) 的大矩阵计算，容易 OOM。
+            # M (点云数) 可能很大。
+            # 解决方法：我们反过来思考。
+            # 不计算矩阵，我们只对单个边进行循环？太慢。
+            # 实用优化：只对 bounding box 内的点进行距离计算。
+            # 由于 GPU 难以动态索引不规则数据，我们还是逐个边循环比较稳妥，
+            # 但为了 Python 速度，必须尽可能向量化。
+            # 这里我采用一种折中方案：逐个处理边，但是利用 torch.where 快速定位。
+            # 实际上，对于 Python 里的 for loop，几千次是可以接受的。
+            current_edges_indices = cand_indices[i:end]
+            for j in range(len(b_p1)):
+                # 单条边处理
+                p1 = b_p1[j]
+                p2 = b_p2[j]
+                e_dir = b_dirs[j]
+                e_len = b_lens[j].item()
+                # 1. Bounding Box Filter (快速大幅裁剪)
+                # 找出这条边 BBox 范围内的所有点 (+ padding)
+                padding = project_dist_thresh + 2.0
+                min_xyz = torch.min(p1, p2) - padding
+                max_xyz = torch.max(p1, p2) + padding
+                # 利用 boolean mask 筛选
+                mask_x = (cx >= min_xyz[0]) & (cx <= max_xyz[0])
+                mask_y = (cy >= min_xyz[1]) & (cy <= max_xyz[1])
+                mask_z = (cz >= min_xyz[2]) & (cz <= max_xyz[2])
+                bbox_mask = mask_x & mask_y & mask_z
+                subset_coords = cloud_coords[bbox_mask]
+                subset_dirs = cloud_dirs[bbox_mask]
+                if subset_coords.shape[0] == 0:
+                    continue
+                # 2. 精确距离计算 (Projection Distance)
+                # 计算 subset 中每个点到线段 p1-p2 的距离平方
+                dist_sq = self._point_line_segment_distance(
+                    subset_coords[:, 0], subset_coords[:, 1], subset_coords[:, 2],
+                    p1[0], p1[1], p1[2],
+                    p2[0], p2[1], p2[2]
+                )
+                # 3. 距离阈值过滤 (Keep voxels inside the tube)
+                dist_mask = dist_sq < (project_dist_thresh ** 2)
+                # 获取在管子内部的体素
+                tube_dirs = subset_dirs[dist_mask]
+                if tube_dirs.shape[0] == 0:
+                    continue
+                # 4. 方向一致性检查 (Direction Check)
+                # 计算点积 (cos theta)
+                # e_dir 是 (3,), tube_dirs 是 (K, 3)
+                dot_prod = torch.matmul(tube_dirs, e_dir)
+                # 这里使用 abs，因为边可能是无向的，或者网络预测可能反向
+                # 如果你的网络严格预测流向，可以去掉 abs
+                dir_sim = torch.abs(dot_prod)
+                # 统计方向符合要求的体素数量
+                valid_voxel_count = (dir_sim > dir_align_threshold).sum().item()
+                # 5. 比值判决 (Ratio Check)
+                # 量化出的 Voxel 数目 ≈ 边的长度 (e_len)
+                # 如果 e_len 很小(比如<1)，我们设为1防止除以0
+                theoretical_count = max(e_len, 1.0)
+                ratio = valid_voxel_count / theoretical_count
+                if ratio > connect_ratio_threshold:
+                    # 找到了！
+                    global_idx = i + j
+                    edge_tuple = cand_indices[global_idx].cpu().numpy().tolist()
+                    final_edges.append(edge_tuple)
+        t_end = time.perf_counter()
+        print(f"[Projection] Logic finished. Accepted {len(final_edges)} edges. Time={t_end - t_start:.4f}s")
+        out_vertices = valid_v_coords.cpu().numpy() / resolution
+        return out_vertices, final_edges
+    def _save_voxel_ply(self, coords: torch.Tensor, labels: torch.Tensor, filename: str):
+        if coords.numel() == 0:
+            return
+        coords_np = coords.cpu().to(torch.float32).numpy()
+        labels_np = labels.cpu().to(torch.float32).numpy()
+        colors = np.zeros((coords_np.shape[0], 3), dtype=np.uint8)
+        colors[labels_np == 0] = [255, 0, 0]
+        colors[labels_np == 1] = [0, 0, 255]
+        try:
+            import trimesh
+            point_cloud = trimesh.PointCloud(vertices=coords_np, colors=colors)
+            ply_path = os.path.join(self.output_voxel_dir, f"{filename}.ply")
+            point_cloud.export(ply_path)
+        except ImportError:
+            ply_path = os.path.join(self.output_voxel_dir, f"{filename}.ply")
+            with open(ply_path, 'w') as f:
+                f.write("ply\n")
+                f.write("format ascii 1.0\n")
+                f.write(f"element vertex {coords_np.shape[0]}\n")
+                f.write("property float x\n")
+                f.write("property float y\n")
+                f.write("property float z\n")
+                f.write("property uchar red\n")
+                f.write("property uchar green\n")
+                f.write("property uchar blue\n")
+                f.write("end_header\n")
+                for i in range(coords_np.shape[0]):
+                    f.write(f"{coords_np[i,0]} {coords_np[i,1]} {coords_np[i,2]} {colors[i,0]} {colors[i,1]} {colors[i,2]}\n")
+    def _load_config(self, config_path=None):
+        if config_path and os.path.exists(config_path):
+            with open(config_path) as f:
+                return yaml.safe_load(f)
+        ckpt_dir = os.path.dirname(self.ckpt_path)
+        possible_configs = [
+            os.path.join(ckpt_dir, "config.yaml"),
+            os.path.join(os.path.dirname(ckpt_dir), "config.yaml")
+        ]
+        for config_file in possible_configs:
+            if os.path.exists(config_file):
+                with open(config_file) as f:
+                    print(f"Loaded config from: {config_file}")
+                    return yaml.safe_load(f)
+        checkpoint = torch.load(self.ckpt_path, map_location='cpu')
+        if 'config' in checkpoint:
+            print("Loaded config from checkpoint")
+            return checkpoint['config']
+        raise FileNotFoundError("Could not find config_edge.yaml in checkpoint directory or parent, and config not saved in checkpoint.")
+    def _init_models(self):
+        self.voxel_encoder = VoxelFeatureEncoder_active_pointnet(
+            in_channels=15,
+            hidden_dim=256,
+            out_channels=1024,
+            scatter_type='mean',
+            n_blocks=5,
+            resolution=128,
+        ).to(self.device)
+        self.connection_head = ConnectionHead(
+            channels=128 * 2,
+            out_channels=1,
+            mlp_ratio=4,
+        ).to(self.device)
+        self.vae = VoxelVAE( # abalation: VoxelVAE_1volume_dilation
+            in_channels=self.config['model']['in_channels'],
+            latent_dim=self.config['model']['latent_dim'],
+            encoder_blocks=self.config['model']['encoder_blocks'],
+            # decoder_blocks=self.config['model']['decoder_blocks'],
+            decoder_blocks_vtx=self.config['model']['decoder_blocks_vtx'],
+            decoder_blocks_edge=self.config['model']['decoder_blocks_edge'],
+            num_heads=8,
+            num_head_channels=64,
+            mlp_ratio=4.0,
+            attn_mode="swin",
+            window_size=8,
+            pe_mode="ape",
+            use_fp16=False,
+            use_checkpoint=False,
+            qk_rms_norm=False,
+            using_subdivide=True,
+            using_attn=self.config['model']['using_attn'],
+            attn_first=self.config['model'].get('attn_first', True),
+            pred_direction=self.config['model'].get('pred_direction', False),
+        ).to(self.device)
+        load_pretrained_woself(
+            checkpoint_path=self.ckpt_path,
+            voxel_encoder=self.voxel_encoder,
+            connection_head=self.connection_head,
+            vae=self.vae,
+        )
+        # --- 【新增】在这里添加权重检查逻辑 ---
+        print(f"--- 正在检查权重文件中的 NaN/Inf 值... ---")
+        has_nan_inf = False
+        if self._check_weights_for_nan_inf(self.vae, "VoxelVAE"):
+            has_nan_inf = True
+        if self._check_weights_for_nan_inf(self.voxel_encoder, "Vertex Encoder"):
+            has_nan_inf = True
+        if self._check_weights_for_nan_inf(self.connection_head, "Connection Head"):
+            has_nan_inf = True
+        if not has_nan_inf:
+            print("--- 权重检查通过。未发现 NaN/Inf 值。 ---")
+        else:
+            # 如果发现坏值，直接抛出异常，因为评估无法继续
+            raise ValueError(f"在检查点 '{self.ckpt_path}' 中发现了 NaN 或 Inf 值。请检查导致训练不稳定的权重文件。")
+        # --- 检查逻辑结束 ---
+        self.vae.eval()
+        self.voxel_encoder.eval()
+        self.connection_head.eval()
+    def _init_dataset(self):
+        self.dataset = VoxelVertexDataset_edge(
+            root_dir=self.dataset_path,
+            base_resolution=self.config['dataset']['base_resolution'],
+            min_resolution=self.config['dataset']['min_resolution'],
+            cache_dir='/gemini/user/private/zhaotianhao/dataset_cache/test_15c_dora',
+            # cache_dir=self.config['dataset']['cache_dir'],
+            renders_dir=self.config['dataset']['renders_dir'],
+            # filter_active_voxels=self.config['dataset']['filter_active_voxels'],
+            filter_active_voxels=False,
+            cache_filter_path=self.config['dataset']['cache_filter_path'],
+            sample_type=self.config['dataset']['sample_type'],
+            active_voxel_res=128,
+            pc_sample_number=819200,
+        )
+        self.dataloader = DataLoader(
+            self.dataset,
+            batch_size=1,
+            shuffle=False,
+            collate_fn=partial(collate_fn_pointnet),
+            num_workers=0,
+            pin_memory=True,
+            # prefetch_factor=4,
+        )
+    def _check_weights_for_nan_inf(self, model: torch.nn.Module, model_name: str) -> bool:
+        """
+        检查模型的所有参数中是否存在 NaN 或 Inf 值。
+        Args:
+            model (torch.nn.Module): 要检查的模型。
+            model_name (str): 模型的名称，用于打印日志。
+        Returns:
+            bool: 如果找到 NaN 或 Inf，则返回 True，否则返回 False。
+        """
+        found_issue = False
+        for name, param in model.named_parameters():
+            if torch.isnan(param.data).any():
+                print(f"[!!!] 严重错误: 在模型 '{model_name}' 的参数 '{name}' 中发现 NaN 值！")
+                found_issue = True
+            if torch.isinf(param.data).any():
+                print(f"[!!!] 严重错误: 在模型 '{model_name}' 的参数 '{name}' 中发现 Inf 值！")
+                found_issue = True
+        return found_issue
+    def _compute_vertex_metrics(self, pred_coords, gt_coords, threshold=1.0):
+        """
+        修改后的函数，确保一对一匹配，并优先匹配最近的点对。
+        """
+        pred_array = np.unique(pred_coords.round().int().cpu().numpy(), axis=0)
+        gt_array = np.unique(gt_coords.round().int().cpu().numpy(), axis=0)
+        pred_total = len(pred_array)
+        gt_total = len(gt_array)
+        if pred_total == 0 or gt_total == 0:
+            return {
+                'recall': 0.0,
+                'precision': 0.0,
+                'f1': 0.0,
+                'matches': 0,
+                'pred_count': pred_total,
+                'gt_count': gt_total
+            }
+        # 依然在预测点上构建KD-Tree，为每个真实点查找最近的预测点
+        tree = cKDTree(pred_array)
+        dists, pred_idxs = tree.query(gt_array, k=1)
+        # --- 核心修改部分 ---
+        # 1. 创建一个列表，包含 (距离, 真实点索引, 预测点索引)
+        #    这样我们就可以按距离对所有可能的匹配进行排序
+        possible_matches = []
+        for gt_idx, (dist, pred_idx) in enumerate(zip(dists, pred_idxs)):
+            if dist <= threshold:
+                possible_matches.append((dist, gt_idx, pred_idx))
+        # 2. 按距离从小到大排序（贪心策略）
+        possible_matches.sort(key=lambda x: x[0])
+        matches = 0
+        # 使用集合来跟踪已经使用过的预测点和真实点，确保一对一匹配
+        used_pred_indices = set()
+        used_gt_indices = set() # 虽然当前逻辑下gt不会重复，但加上更严谨
+        # 3. 遍历排序后的可能匹配，进行一对一分配
+        for dist, gt_idx, pred_idx in possible_matches:
+            # 如果这个预测点和这个真实点都还没有被使用过
+            if pred_idx not in used_pred_indices and gt_idx not in used_gt_indices:
+                matches += 1
+                used_pred_indices.add(pred_idx)
+                used_gt_indices.add(gt_idx)
+        # --- 修改结束 ---
+        # matches 现在是真正的 True Positives 数量，它绝不会超过 pred_total 或 gt_total
+        recall = matches / gt_total if gt_total > 0 else 0.0
+        precision = matches / pred_total if pred_total > 0 else 0.0
+        # 计算F1时，使用标准的 Precision 和 Recall 定义
+        if (precision + recall) == 0:
+            f1 = 0.0
+        else:
+            f1 = 2 * (precision * recall) / (precision + recall)
+        return {
+            'recall': recall,
+            'precision': precision,
+            'f1': f1,
+            'matches': matches,
+            'pred_count': pred_total,
+            'gt_count': gt_total
+        }
+    def _compute_vertex_metrics(self, pred_coords, gt_coords, threshold=1.0):
+        """
+        一个折衷的顶点指标计算方案。
+        它沿用“为每个真实点寻找最近预测点”的逻辑，
+        但通过修正计算方式，确保Precision和F1值不会超过1.0。
+        """
+        # 假设 pred_coords 和 gt_coords 是 PyTorch 张量
+        pred_array = np.unique(pred_coords.round().int().cpu().numpy(), axis=0)
+        gt_array = np.unique(gt_coords.round().int().cpu().numpy(), axis=0)
+        pred_total = len(pred_array)
+        gt_total = len(gt_array)
+        if pred_total == 0 or gt_total == 0:
+            return {
+                'recall': 0.0,
+                'precision': 0.0,
+                'f1': 0.0,
+                'matches': 0,
+                'pred_count': pred_total,
+                'gt_count': gt_total
+            }
+        # 在预测点上构建KD-Tree，为每个真实点查找最近的预测点
+        tree = cKDTree(pred_array)
+        dists, _ = tree.query(gt_array, k=1) # 我们在这里其实不需要 pred 的索引
+        # 1. 计算从 gt 角度出发的匹配数 (True Positives for Recall)
+        #    这和您的第一个函数完全一样。
+        #    这个值代表了“有多少个真实点被成功找到了”。
+        matches_from_gt = np.sum(dists <= threshold)
+        # 2. 计算 Recall (召回率)
+        #    召回率的分母是真实点的总数，所以这里的计算是合理的。
+        recall = matches_from_gt / gt_total if gt_total > 0 else 0.0
+        # 3. 计算 Precision (精确率) - ✅ 这是核心修正点
+        #    精确率的分母是预测点的总数。
+        #    分子（True Positives）不能超过预测点的总数。
+        #    因此，我们取 matches_from_gt 和 pred_total 中的较小值。
+        #    这解决了 Precision > 1 的问题。
+        tp_for_precision = min(matches_from_gt, pred_total)
+        precision = tp_for_precision / pred_total if pred_total > 0 else 0.0
+        # 4. 使用标准的F1分数公式
+        #    您原来的 F1 公式 `2 * matches / (pred + gt)` 是 L1-Score，
+        #    更常用的是基于 Precision 和 Recall 的调和平均数。
+        if (precision + recall) == 0:
+            f1 = 0.0
+        else:
+            f1 = 2 * (precision * recall) / (precision + recall)
+        return {
+            'recall': recall,
+            'precision': precision,
+            'f1': f1,
+            'matches': matches_from_gt, # 仍然报告原始的匹配数，便于观察
+            'pred_count': pred_total,
+            'gt_count': gt_total
+        }
+    def _compute_chamfer_distance(self, p1: torch.Tensor, p2: torch.Tensor, one_sided: bool = False):
+        if len(p1) == 0 or len(p2) == 0:
+            return float('nan')
+        dist_p1_p2 = torch.min(torch.cdist(p1, p2), dim=1)[0].mean()
+        if one_sided:
+            return dist_p1_p2.item()
+        else:
+            dist_p2_p1 = torch.min(torch.cdist(p2, p1), dim=1)[0].mean()
+            return (dist_p1_p2 + dist_p2_p1).item() / 2
+    def visualize_latent_space_pca(self, sample_idx: int):
+        """
+        Encodes a sample, performs PCA on its latent features, and saves a
+        colored PLY file for visualization.
+        The position of each point in the PLY file corresponds to the spatial
+        location in the latent grid.
+        The color of each point represents the first three principal components
+        of its feature vector.
+        """
+        print(f"--- Starting Latent Space PCA Visualization for Sample {sample_idx} ---")
+        self.vae.eval()
+        try:
+            # 1. Get the latent representation for the sample
+            latent = self._get_latent_for_sample(sample_idx)
+        except ValueError as e:
+            print(f"Error: {e}")
+            return
+        latent_coords = latent.coords.detach().cpu().numpy()
+        latent_feats = latent.feats.detach().cpu().numpy()
+        if latent_feats.shape[0] < 3:
+            print(f"Warning: Not enough latent points ({latent_feats.shape[0]}) to perform PCA. Skipping.")
+            return
+        print(f"--> Performing PCA on {latent_feats.shape[0]} latent vectors of dimension {latent_feats.shape[1]}...")
+        # 2. Perform PCA to reduce feature dimensions to 3
+        pca = PCA(n_components=3)
+        pca_features = pca.fit_transform(latent_feats)
+        print(f"    Explained variance ratio by 3 components: {pca.explained_variance_ratio_}")
+        print(f"    Total explained variance: {np.sum(pca.explained_variance_ratio_):.4f}")
+        # 3. Normalize the PCA components to be used as RGB colors [0, 255]
+        # We normalize each component independently to maximize color contrast
+        normalized_colors = np.zeros_like(pca_features)
+        for i in range(3):
+            min_val = pca_features[:, i].min()
+            max_val = pca_features[:, i].max()
+            if max_val - min_val > 1e-6:
+                normalized_colors[:, i] = (pca_features[:, i] - min_val) / (max_val - min_val)
+            else:
+                normalized_colors[:, i] = 0.5 # Handle case of constant value
+        colors_uint8 = (normalized_colors * 255).astype(np.uint8)
+        # 4. Prepare spatial coordinates for the point cloud
+        # latent_coords is (batch_idx, x, y, z), we want the xyz part
+        spatial_coords = latent_coords[:, 1:]
+        # 5. Create and save the colored PLY file
+        try:
+            # Create a Trimesh PointCloud object
+            point_cloud = trimesh.points.PointCloud(vertices=spatial_coords, colors=colors_uint8)
+            # Define the output filename
+            filename = f"sample_{sample_idx}_latent_pca.ply"
+            ply_path = os.path.join(self.output_voxel_dir, filename)
+            # Export the file
+            point_cloud.export(ply_path)
+            print(f"--> Successfully saved PCA visualization to: {ply_path}")
+        except Exception as e:
+            print(f"Error during Trimesh export: {e}")
+            print("Please ensure 'trimesh' is installed correctly.")
+    def _get_latent_for_sample(self, sample_idx: int) -> SparseTensor:
+        """
+        Encodes a single sample and returns its latent representation.
+        """
+        print(f"--> Encoding sample {sample_idx} to get its latent vector...")
+        # Get data for the specified sample
+        batch_data = self.dataset[sample_idx]
+        if batch_data is None:
+            raise ValueError(f"Sample at index {sample_idx} could not be loaded.")
+        # Use the collate function to form a batch
+        batch_data = collate_fn_pointnet([batch_data])
+        with torch.no_grad():
+            active_coords = batch_data['active_voxels_128'].to(self.device)
+            point_cloud = batch_data['point_cloud_128'].to(self.device)
+            active_voxel_feats = self.voxel_encoder(
+                p=point_cloud,
+                sparse_coords=active_coords,
+                res=128,
+                bbox_size=(-0.5, 0.5),
+            )
+            sparse_input = SparseTensor(
+                feats=active_voxel_feats,
+                coords=active_coords.int()
+            )
+            # 2. Encode to get the latent representation
+            latent_128, posterior = self.vae.encode(sparse_input, sample_posterior=True,)
+            print(f"    Latent for sample {sample_idx} obtained. Shape: {latent_128.feats.shape}")
+            return latent_128
+    def evaluate(self, num_samples=None, visualize=False, chamfer_threshold=0.9, threshold=1.):
+        total_samples = len(self.dataset)
+        eval_samples = min(num_samples or total_samples, total_samples)
+        sample_indices = random.sample(range(total_samples), eval_samples) if num_samples else range(total_samples)
+        # sample_indices = range(eval_samples)
+        eval_dataset = Subset(self.dataset, sample_indices)
+        eval_loader = DataLoader(
+            eval_dataset,
+            batch_size=1,
+            shuffle=False,
+            collate_fn=partial(collate_fn_pointnet),
+            num_workers=self.config['training']['num_workers'],
+            pin_memory=True,
+        )
+        per_sample_metrics = {
+            'vertex': {res: [] for res in [128, 256, 512]},
+            'edge': {res: [] for res in [128, 256, 512]},
+            'sample_names': []
+        }
+        avg_metrics = {
+            'vertex': {res: defaultdict(list) for res in [128, 256, 512]},
+            'edge': {res: defaultdict(list) for res in [128, 256, 512]},
+        }
+        self.vae.eval()
+        for batch_idx, batch_data in enumerate(tqdm(eval_loader, desc="Evaluating")):
+            if batch_data is None:
+                continue
+            sample_idx = sample_indices[batch_idx]
+            sample_name = f'sample_{sample_idx}'
+            per_sample_metrics['sample_names'].append(sample_name)
+            # batch_save_path = f"/gemini/user/private/zhaotianhao/checkpoints/output_slat_flow_matching_active/8w_128to256_head_rope/215000_sample_active_vis_42seed_1000complex/gt_data_batch_{batch_idx}.pt"
+            # if not os.path.exists(batch_save_path):
+            #     print(f"Warning: Saved batch file not found: {batch_save_path}")
+            #     continue
+            # batch_data = torch.load(batch_save_path, map_location=self.device)
+            with torch.no_grad():
+                # 1. Get input data
+                combined_voxels_512 = batch_data['combined_voxels_512'].to(self.device)
+                combined_voxel_labels_512 = batch_data['combined_voxel_labels_512'].to(self.device)
+                gt_combined_endpoints_512 = batch_data['gt_combined_endpoints_512'].to(self.device)
+                gt_combined_errors_512 = batch_data['gt_combined_errors_512'].to(self.device)
+                edge_mask = (combined_voxel_labels_512 == 1)
+                gt_edge_endpoints_512 = gt_combined_endpoints_512[edge_mask].to(self.device)
+                gt_edge_voxels_512 = combined_voxels_512[edge_mask].to(self.device)
+                p1 = gt_edge_endpoints_512[:, 1:4].float()
+                p2 = gt_edge_endpoints_512[:, 4:7].float()
+                mask = ( (p1[:,0] < p2[:,0]) |
+                        ((p1[:,0] == p2[:,0]) & (p1[:,1] < p2[:,1])) |
+                        ((p1[:,0] == p2[:,0]) & (p1[:,1] == p2[:,1]) & (p1[:,2] <= p2[:,2])) )
+                pA = torch.where(mask[:, None], p1, p2)  # smaller one
+                pB = torch.where(mask[:, None], p2, p1)  # larger one
+                d = pB - pA
+                dir_gt = F.normalize(d, dim=-1, eps=1e-6)
+                gt_vertex_voxels_512 = batch_data['gt_vertex_voxels_512'].to(self.device).int()
+                vtx_128 = downsample_voxels(gt_vertex_voxels_512, input_resolution=512, output_resolution=128)
+                vtx_256 = downsample_voxels(gt_vertex_voxels_512, input_resolution=512, output_resolution=256)
+                edge_128 = downsample_voxels(combined_voxels_512, input_resolution=512, output_resolution=128)
+                edge_256 = downsample_voxels(combined_voxels_512, input_resolution=512, output_resolution=256)
+                edge_512 = combined_voxels_512
+                gt_edge_voxels_list = [
+                    edge_128,
+                    edge_256,
+                    edge_512,
+                ]
+                active_coords = batch_data['active_voxels_128'].to(self.device)
+                point_cloud = batch_data['point_cloud_128'].to(self.device)
+                active_voxel_feats = self.voxel_encoder(
+                    p=point_cloud,
+                    sparse_coords=active_coords,
+                    res=128,
+                    bbox_size=(-0.5, 0.5),
+                )
+                sparse_input = SparseTensor(
+                    feats=active_voxel_feats,
+                    coords=active_coords.int()
+                )
+                latent_128, posterior = self.vae.encode(sparse_input)
+                # load_path = f'/gemini/user/private/zhaotianhao/checkpoints/output_slat_flow_matching_active/8w_128to256_head_rope/215000_sample_active_vis_42seed_1000complex/sample_latent_{batch_idx}.pt'
+                # latent_128 = torch.load(load_path, map_location=self.device)
+                print('latent_128.feats.mean()', latent_128.feats.mean(), 'latent_128.feats.std()', latent_128.feats.std())
+                print('posterior.mean', posterior.mean.mean(), 'posterior.std', posterior.std.mean(), 'posterior.var', posterior.var.mean())
+                print('latent_128.coords.shape', latent_128.coords.shape)
+                # latent_128 = torch.load(f"/root/Trisf/output_slat_flow_matching/ckpts/1100_chair_sample/110000step_sample/sample_results_samples_{batch_idx}.pt", map_location=self.device)
+                latent_128 = SparseTensor(
+                    coords=latent_128.coords,
+                    feats=latent_128.feats + 0. * torch.randn_like(latent_128.feats),
+                )
+                # self.output_voxel_dir = os.path.dirname(load_path)
+                # self.output_obj_dir = os.path.dirname(load_path)
+                # 7. Decoding with separate vertex and edge processing
+                decoded_results = self.vae.decode(
+                    latent_128,
+                    gt_vertex_voxels_list=[],
+                    gt_edge_voxels_list=[],
+                    training=False,
+                    inference_threshold=0.5,
+                    vis_last_layer=False,
+                )
+                error = 0 # decoded_results[-1]['edge']['predicted_offset_feats']
+                if self.config['model'].get('pred_direction', False):
+                    pred_dir = decoded_results[-1]['edge']['predicted_direction_feats']
+                    zero_mask = (pred_dir == 0).all(dim=1)  # [N]，True 表示这一行全为0
+                    num_zeros = zero_mask.sum().item()
+                    print("Number of zero vectors:", num_zeros)
+                    pred_edge_coords_3d = decoded_results[-1]['edge']['coords']
+                    print('pred_edge_coords_3d.shape', pred_edge_coords_3d.shape)
+                    print('pred_dir.shape', pred_dir.shape)
+                    if pred_edge_coords_3d.shape[-1] == 4:
+                        pred_edge_coords_3d = pred_edge_coords_3d[:, 1:]
+                    save_pth = os.path.join(self.output_voxel_dir, f"{sample_name}_direction.ply")
+                    visualize_colored_points_ply(pred_edge_coords_3d, pred_dir, save_pth)
+                    save_pth = os.path.join(self.output_voxel_dir, f"{sample_name}_direction_gt.ply")
+                    visualize_colored_points_ply((gt_edge_voxels_512[:, 1:]), dir_gt, save_pth)
+                pred_vtx_coords_3d = decoded_results[-1]['vertex']['coords']
+                pred_edge_coords_3d = decoded_results[-1]['edge']['coords']
+                gt_vertex_voxels_512 = batch_data['gt_vertex_voxels_512'][:, 1:].to(self.device)
+                gt_edge_voxels_512 = batch_data['gt_edge_voxels_512'][:, 1:].to(self.device)
+                # Calculate metrics and save results
+                matches, match_rate, pred_total, gt_total = compute_vertex_matching(pred_vtx_coords_3d, gt_vertex_voxels_512, threshold=threshold,)
+                print(f"\n----- Resolution {512} vtx -----")
+                print(f"Pred Vertices: {pred_total} | GT Vertices: {gt_total}")
+                print(f"Matched Vertices: {matches} | Match Rate: {match_rate:.2%}")
+                self._save_voxel_ply(pred_vtx_coords_3d / 512., torch.zeros(len(pred_vtx_coords_3d)), f"{sample_name}_pred_vtx")
+                self._save_voxel_ply((pred_edge_coords_3d) / 512, torch.zeros(len(pred_edge_coords_3d)), f"{sample_name}_pred_edge")
+                self._save_voxel_ply(gt_vertex_voxels_512 / 512, torch.zeros(len(gt_vertex_voxels_512)), f"{sample_name}_gt_vertex")
+                self._save_voxel_ply((combined_voxels_512[:, 1:]) / 512., torch.zeros(len(gt_combined_errors_512)), f"{sample_name}_gt_edge")
+                # Calculate vertex-specific metrics
+                matches, match_rate, pred_total, gt_total = compute_vertex_matching(pred_edge_coords_3d, combined_voxels_512[:, 1:], threshold=threshold,)
+                print(f"\n----- Resolution {512} edge -----")
+                print('pred_edge_coords_3d.shape', pred_edge_coords_3d.shape)
+                print('gt_edge_voxels_512.shape', gt_edge_voxels_512.shape)
+                print(f"Pred Vertices: {pred_total} | GT Vertices: {gt_total}")
+                print(f"Matched Vertices: {matches} | Match Rate: {match_rate:.2%}")
+                pred_vertex_coords_np = np.round(pred_vtx_coords_3d.cpu().numpy()).astype(int)
+                pred_edges = []
+                gt_vertex_coords_np = np.round(gt_vertex_voxels_512.cpu().numpy()).astype(int)
+                if visualize:
+                    if pred_vtx_coords_3d.shape[-1] == 4:
+                        pred_vtx_coords_float = pred_vtx_coords_3d[:, 1:].float()
+                    else:
+                        pred_vtx_coords_float = pred_vtx_coords_3d.float()
+                    pred_vtx_feats = decoded_results[-1]['vertex']['feats']
+                # ==========================================
+                # Link Prediction & Mesh Generation
+                # ==========================================
+                print("Predicting connectivity...")
+                # 1. 预测边
+                # 注意：K_neighbors 的设置。如果是物体，64 足够了。
+                # 如果点非常稀疏，可能需要更大。
+                pred_edges = predict_mesh_connectivity(
+                    connection_head=self.connection_head, # 或者是 self.connection_head，取决于你在哪里定义的
+                    vtx_feats=pred_vtx_feats,
+                    vtx_coords=pred_vtx_coords_float,
+                    batch_size=4096,
+                    threshold=0.5,
+                    k_neighbors=None,
+                    device=self.device
+                )
+                print(f"Predicted {len(pred_edges)} edges.")
+                # 2. 构建三角形
+                num_verts = pred_vtx_coords_float.shape[0]
+                pred_faces = build_triangles_from_edges(pred_edges, num_verts)
+                print(f"Constructed {len(pred_faces)} triangles.")
+                # 3. 保存 OBJ
+                import trimesh
+                # 坐标归一化/还原 (根据你的需求，这里假设你是 0-512 的体素坐标)
+                # 如果想保存为归一化坐标：
+                mesh_verts = pred_vtx_coords_float.cpu().numpy() / 512.0
+                # 如果有 error offset，记得加上！
+                # 你之前的代码好像没有对 vertex 加 offset，只对 edge 加了
+                # 如果 vertex 也有 offset (如 dual contouring)，在这里加上
+                # 移动到中心 (可选)
+                mesh_verts = mesh_verts - 0.5
+                mesh = trimesh.Trimesh(vertices=mesh_verts, faces=pred_faces)
+                # 过滤孤立点 (可选)
+                # mesh.remove_unreferenced_vertices()
+                output_obj_path = os.path.join(self.output_voxel_dir, f"{sample_name}_recon.obj")
+                mesh.export(output_obj_path)
+                print(f"Saved mesh to {output_obj_path}")
+                # 保存边线 (用于 Debug)
+                # 有时候三角形很难形成，只看边也很有用
+                edges_path = os.path.join(self.output_voxel_dir, f"{sample_name}_edges.ply")
+                # self._visualize_vertices(pred_edge_coords_np, gt_edge_coords_np, f"{sample_name}_edge_comparison")
+                # Process results at different resolutions
+                for i, res in enumerate([128, 256, 512]):
+                    if i >= len(decoded_results):
+                        continue
+                    gt_key = f'gt_vertex_voxels_{res}'
+                    if gt_key not in batch_data:
+                        continue
+                    if i == 0:
+                        pred_coords_res = decoded_results[i]['vtx_sp'].coords[:, 1:].float()
+                        gt_coords_res = batch_data[gt_key][:, 1:].float().to(self.device)
+                    else:
+                        pred_coords_res = decoded_results[i]['vertex']['coords'].float()
+                        gt_coords_res = batch_data[gt_key][:, 1:].float().to(self.device)
+                    v_metrics = self._compute_vertex_metrics(pred_coords_res, gt_coords_res, threshold=threshold)
+                    per_sample_metrics['vertex'][res].append({
+                        'recall': v_metrics['recall'],
+                        'precision': v_metrics['precision'],
+                        'f1': v_metrics['f1'],
+                        'num_pred': len(pred_coords_res),
+                        'num_gt': len(gt_coords_res)
+                    })
+                    avg_metrics['vertex'][res]['recall'].append(v_metrics['recall'])
+                    avg_metrics['vertex'][res]['precision'].append(v_metrics['precision'])
+                    avg_metrics['vertex'][res]['f1'].append(v_metrics['f1'])
+                    gt_edge_key = f'gt_edge_voxels_{res}'
+                    if gt_edge_key not in batch_data:
+                        continue
+                    if i == 0:
+                        pred_edge_coords_res = decoded_results[i]['edge_sp'].coords[:, 1:].float()
+                        # gt_edge_coords_res = batch_data[gt_edge_key][:, 1:].float().to(self.device)
+                        idx = i
+                        gt_edge_coords_res = gt_edge_voxels_list[idx][:, 1:].float().to(self.device)
+                    elif i == 1:
+                        idx = i
+                        #################################
+                        # pred_edge_coords_res = decoded_results[i]['edge']['coords'].float() - error / 2. + 0.5
+                        # # gt_edge_coords_res = batch_data[gt_edge_key][:, 1:].float().to(self.device)
+                        # gt_edge_coords_res = gt_edge_voxels_list[idx][:, 1:].float().to(self.device) - gt_combined_errors_512[:, 1:].to(self.device) + 0.5
+                        pred_edge_coords_res = decoded_results[i]['edge']['coords'].float()
+                        gt_edge_coords_res = gt_edge_voxels_list[idx][:, 1:].float().to(self.device)
+                        # self._save_voxel_ply(gt_edge_voxels_list[idx][:, 1:].float().to(self.device) / (128*2**i), torch.zeros(len(gt_edge_coords_res)), f"{sample_name}_gt_edge_{128*2**i}res_wooffset")
+                        # self._save_voxel_ply(decoded_results[i]['edge']['coords'].float() / (128*2**i), torch.zeros(len(pred_edge_coords_res)), f"{sample_name}_pred_edge_{128*2**i}res_wooffset")
+                    else:
+                        idx = i
+                        pred_edge_coords_res = decoded_results[i]['edge']['coords'].float()
+                        # gt_edge_coords_res = batch_data[gt_edge_key][:, 1:].float().to(self.device)
+                        gt_edge_coords_res = gt_edge_voxels_list[idx][:, 1:].float().to(self.device)
+                    # self._save_voxel_ply(gt_edge_coords_res / (128*2**i), torch.zeros(len(gt_edge_coords_res)), f"{sample_name}_gt_edge_{128*2**i}res")
+                    # self._save_voxel_ply(pred_edge_coords_res / (128*2**i), torch.zeros(len(pred_edge_coords_res)), f"{sample_name}_pred_edge_{128*2**i}res")
+                    e_metrics = self._compute_vertex_metrics(pred_edge_coords_res, gt_edge_coords_res, threshold=threshold)
+                    per_sample_metrics['edge'][res].append({
+                        'recall': e_metrics['recall'],
+                        'precision': e_metrics['precision'],
+                        'f1': e_metrics['f1'],
+                        'num_pred': len(pred_edge_coords_res),
+                        'num_gt': len(gt_edge_coords_res)
+                    })
+                    avg_metrics['edge'][res]['recall'].append(e_metrics['recall'])
+                    avg_metrics['edge'][res]['precision'].append(e_metrics['precision'])
+                    avg_metrics['edge'][res]['f1'].append(e_metrics['f1'])
+        avg_metrics_processed = {}
+        for category, res_dict in avg_metrics.items():
+            avg_metrics_processed[category] = {}
+            for resolution, metric_dict in res_dict.items():
+                avg_metrics_processed[category][resolution] = {
+                    metric_name: np.mean(values) if values else float('nan')
+                    for metric_name, values in metric_dict.items()
+                }
+        result_data = {
+            'config': self.config,
+            'checkpoint': self.ckpt_path,
+            'dataset': self.dataset_path,
+            'num_samples': eval_samples,
+            'per_sample_metrics': per_sample_metrics,
+            'avg_metrics': avg_metrics_processed
+        }
+        results_file_path = os.path.join(self.result_dir, f"evaluation_results_epoch{self.epoch}.yaml")
+        with open(results_file_path, 'w') as f:
+            yaml.dump(result_data, f, default_flow_style=False)
+        return result_data
+    def _generate_line_voxels(
+        self,
+        p1: torch.Tensor,
+        p2: torch.Tensor
+    ) -> Tuple[
+        List[Tuple[int, int, int]],
+        List[Tuple[torch.Tensor, torch.Tensor]],
+        List[np.ndarray]
+    ]:
+        """
+        Improved version using better sampling strategy
+        """
+        p1_np = p1 #.cpu().numpy()
+        p2_np = p2 #.cpu().numpy()
+        voxel_dict = OrderedDict()
+        # Use proper 3D line voxelization algorithm
+        def bresenham_3d(p1, p2):
+            """3D Bresenham's line algorithm"""
+            x1, y1, z1 = np.round(p1).astype(int)
+            x2, y2, z2 = np.round(p2).astype(int)
+            points = []
+            dx = abs(x2 - x1)
+            dy = abs(y2 - y1)
+            dz = abs(z2 - z1)
+            xs = 1 if x2 > x1 else -1
+            ys = 1 if y2 > y1 else -1
+            zs = 1 if z2 > z1 else -1
+            # Driving axis is X
+            if dx >= dy and dx >= dz:
+                err_1 = 2 * dy - dx
+                err_2 = 2 * dz - dx
+                for i in range(dx + 1):
+                    points.append((x1, y1, z1))
+                    if err_1 > 0:
+                        y1 += ys
+                        err_1 -= 2 * dx
+                    if err_2 > 0:
+                        z1 += zs
+                        err_2 -= 2 * dx
+                    err_1 += 2 * dy
+                    err_2 += 2 * dz
+                    x1 += xs
+            # Driving axis is Y
+            elif dy >= dx and dy >= dz:
+                err_1 = 2 * dx - dy
+                err_2 = 2 * dz - dy
+                for i in range(dy + 1):
+                    points.append((x1, y1, z1))
+                    if err_1 > 0:
+                        x1 += xs
+                        err_1 -= 2 * dy
+                    if err_2 > 0:
+                        z1 += zs
+                        err_2 -= 2 * dy
+                    err_1 += 2 * dx
+                    err_2 += 2 * dz
+                    y1 += ys
+            # Driving axis is Z
+            else:
+                err_1 = 2 * dx - dz
+                err_2 = 2 * dy - dz
+                for i in range(dz + 1):
+                    points.append((x1, y1, z1))
+                    if err_1 > 0:
+                        x1 += xs
+                        err_1 -= 2 * dz
+                    if err_2 > 0:
+                        y1 += ys
+                        err_2 -= 2 * dz
+                    err_1 += 2 * dx
+                    err_2 += 2 * dy
+                    z1 += zs
+            return points
+        # Get all voxels using Bresenham algorithm
+        voxel_coords = bresenham_3d(p1_np, p2_np)
+        # Add all voxels to dictionary
+        for coord in voxel_coords:
+            voxel_dict[tuple(coord)] = (p1, p2)
+        voxel_coords = list(voxel_dict.keys())
+        endpoint_pairs = list(voxel_dict.values())
+        # --- compute error vectors ---
+        error_vectors = []
+        diff = p2_np - p1_np
+        d_norm_sq = np.dot(diff, diff)
+        for v in voxel_coords:
+            v_center = np.array(v, dtype=float) + 0.5
+            if d_norm_sq == 0:  # degenerate line
+                closest = p1_np
+            else:
+                t = np.dot(v_center - p1_np, diff) / d_norm_sq
+                t = np.clip(t, 0.0, 1.0)
+                closest = p1_np + t * diff
+            error_vectors.append(v_center - closest)
+        return voxel_coords, endpoint_pairs, error_vectors
+# 使用示例
+def set_seed(seed: int):
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed(seed)
+        torch.cuda.manual_seed_all(seed)
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False
+def evaluate_checkpoint(ckpt_path, dataset_path, eval_dir):
+    set_seed(42)
+    tester = Tester(ckpt_path=ckpt_path, dataset_path=dataset_path)
+    result_data = tester.evaluate(num_samples=NUM_SAMPLES, visualize=VISUALIZE, chamfer_threshold=CHAMFER_EDGE_THRESHOLD, threshold=THRESHOLD)
+    # 生成文件名
+    epoch_str = os.path.basename(ckpt_path).split('_')[1].split('.')[0]
+    dataset_name = os.path.basename(os.path.normpath(dataset_path))
+    # 保存简版报告(TXT)
+    summary_path = os.path.join(eval_dir, f"epoch{epoch_str}_{dataset_name}_summary_threshold{THRESHOLD}_one2one.txt")
+    with open(summary_path, 'w') as f:
+        # 头部信息
+        f.write(f"Checkpoint: {os.path.basename(ckpt_path)}\n")
+        f.write(f"Dataset: {dataset_name}\n")
+        f.write(f"Evaluation Samples: {result_data['num_samples']}\n\n")
+        # 平均指标
+        f.write("=== Average Metrics ===\n")
+        for category, data in result_data['avg_metrics'].items():
+            if isinstance(data, dict):  # 处理多分辨率情况
+                f.write(f"\n{category.upper()}:\n")
+                for res, metrics in data.items():
+                    f.write(f"  Resolution {res}:\n")
+                    for k, v in metrics.items():
+                        # 确保值是数字类型后再格式化
+                        if isinstance(v, (int, float)):
+                            f.write(f"    {str(k).ljust(15)}: {v:.4f}\n")
+                        else:
+                            f.write(f"    {str(k).ljust(15)}: {str(v)}\n")
+            else:  # 处理非多分辨率情况
+                f.write(f"\n{category.upper()}:\n")
+                for k, v in data.items():
+                    if isinstance(v, (int, float)):
+                        f.write(f"  {str(k).ljust(15)}: {v:.4f}\n")
+                    else:
+                        f.write(f"  {str(k).ljust(15)}: {str(v)}\n")
+        # 样本级详细统计
+        f.write("\n\n=== Detailed Per-Sample Metrics ===\n")
+        for name, vertex_metrics, edge_metrics in zip(
+            result_data['per_sample_metrics']['sample_names'],
+            zip(*[result_data['per_sample_metrics']['vertex'][res] for res in [128, 256, 512]]),
+            zip(*[result_data['per_sample_metrics']['edge'][res] for res in [128, 256, 512]])
+        ):
+            # 样本标题
+            f.write(f"\n◆ Sample: {name}\n")
+            # 顶点指标
+            f.write(f"[Vertex Prediction]\n")
+            f.write(f"  {'Resolution'.ljust(10)} {'Recall'.ljust(8)} {'Precision'.ljust(8)} {'F1'.ljust(8)} {'Pred/Gt'.ljust(10)}\n")
+            for res, metrics in zip([128, 256, 512], vertex_metrics):
+                f.write(f"  {str(res).ljust(10)} "
+                      f"{metrics['recall']:.4f}    "
+                      f"{metrics['precision']:.4f}    "
+                      f"{metrics['f1']:.4f}    "
+                      f"{metrics['num_pred']}/{metrics['num_gt']}\n")
+            # Edge指标
+            f.write(f"[Edge Prediction]\n")
+            f.write(f"  {'Resolution'.ljust(10)} {'Recall'.ljust(8)} {'Precision'.ljust(8)} {'F1'.ljust(8)} {'Pred/Gt'.ljust(10)}\n")
+            for res, metrics in zip([128, 256, 512], edge_metrics):
+                f.write(f"  {str(res).ljust(10)} "
+                      f"{metrics['recall']:.4f}    "
+                      f"{metrics['precision']:.4f}    "
+                      f"{metrics['f1']:.4f}    "
+                      f"{metrics['num_pred']}/{metrics['num_gt']}\n")
+            f.write("-"*60 + "\n")
+    print(f"Saved summary to: {summary_path}")
+    return result_data
+if __name__ == '__main__':
+    with torch.cuda.amp.autocast(dtype=torch.bfloat16):
+        evaluate_all_checkpoints = True  # 设置为 True 启用范围过滤
+        EPOCH_START = 0
+        EPOCH_END = 12
+        CHAMFER_EDGE_THRESHOLD=0.5
+        NUM_SAMPLES=20
+        VISUALIZE=True
+        THRESHOLD=1.5
+        VISUAL_FIELD=False
+        ckpt_path = '/gemini/user/private/zhaotianhao/checkpoints/vae/train_9w_200_2000face/shapenet_bs2_128to512_wolabel_dir_sorted_dora_small/checkpoint_epoch0_batch10433_loss1.2657.pt'
+        ckpt_path = '/gemini/user/private/zhaotianhao/checkpoints/vae/unique_files_glb_under6000face_2degree_30ratio_0.01/shapenet_bs2_128to512_wolabel_dir_sorted_dora_small/checkpoint_epoch0_batch2000_loss0.3315.pt'
+        dataset_path = '/gemini/user/private/zhaotianhao/data/MERGED_DATASET_count_200_2000_100000/test'
+        dataset_path = '/gemini/user/private/zhaotianhao/data/why_filter_unquantized'
+        # dataset_path = '/gemini/user/private/zhaotianhao/data/trellis500k_compress_glb'
+        dataset_path = '/gemini/user/private/zhaotianhao/data/unique_files_glb_under6000face_2degree_30ratio_0.01'
+        if dataset_path == '/HOME/paratera_xy/pxy1054/HDD_POOL/Trisf/data/mesh/objaverse_200_2000':
+            RENDERS_DIR = '/HOME/paratera_xy/pxy1054/HDD_POOL/Trisf/data/mesh_render_img/objaverse_200_2000/renders_cond'
+        else:
+            RENDERS_DIR = ''
+        ckpt_dir = os.path.dirname(ckpt_path)
+        eval_dir = os.path.join(ckpt_dir, "evaluate")
+        os.makedirs(eval_dir, exist_ok=True)
+        if False:
+            for i in range(NUM_SAMPLES):
+                print("--- Starting Latent Space PCA Visualization ---")
+                tester = Tester(ckpt_path=ckpt_path, dataset_path=dataset_path)
+                tester.visualize_latent_space_pca(sample_idx=i)
+                print("--- PCA Visualization Finished ---")
+        if not evaluate_all_checkpoints:
+            evaluate_checkpoint(ckpt_path, dataset_path, eval_dir)
+        else:
+            pt_files = sorted([f for f in os.listdir(ckpt_dir) if f.endswith('.pt')])
+            filtered_pt_files = []
+            for f in pt_files:
+                try:
+                    parts = f.split('_')
+                    epoch_str = parts[1].replace('epoch', '')
+                    epoch = int(epoch_str)
+                    if EPOCH_START <= epoch <= EPOCH_END:
+                        filtered_pt_files.append(f)
+                except Exception as e:
+                    print(f"Warning: Could not parse epoch from {f}: {e}")
+                    continue
+            for pt_file in filtered_pt_files:
+                full_ckpt_path = os.path.join(ckpt_dir, pt_file)
+                evaluate_checkpoint(full_ckpt_path, dataset_path, eval_dir)

train_slat_flow_128to1024_pointnet.py ADDED Viewed

	@@ -0,0 +1,484 @@

+import os
+# os.environ['ATTN_BACKEND'] = 'xformers' # xformers is generally compatible with DDP
+import numpy as np
+import torch
+import yaml
+from torch.utils.data import DataLoader, DistributedSampler
+from functools import partial
+import torch.nn.functional as F
+from torch.optim import AdamW
+from torch.amp import GradScaler, autocast
+from typing import *
+from transformers import CLIPTextModel, AutoTokenizer, CLIPTextConfig
+import torch.distributed as dist
+from torch.nn.parallel import DistributedDataParallel as DDP
+# --- Updated Imports based on VAE script ---
+from dataset_triposf_head import VoxelVertexDataset_edge, collate_fn_pointnet
+from triposf.models.triposf_vae.VoxelFeatureVAE_edge_woself_128to1024_decoder_head import VoxelVAE
+from vertex_encoder import VoxelFeatureEncoder_active_pointnet, ConnectionHead
+from triposf.modules.sparse.basic import SparseTensor
+from trellis.models.structured_latent_flow import SLatFlowModel
+from trellis.trainers.flow_matching.sparse_flow_matching_alone import SparseFlowMatchingTrainer
+from safetensors.torch import load_file
+import torch.multiprocessing as mp
+import open3d as o3d
+from PIL import Image
+import torch.nn as nn
+from triposf.modules.utils import DiagonalGaussianDistribution
+import torchvision.transforms as transforms
+import re
+# --- Distributed Setup Functions ---
+def setup_distributed(backend="nccl"):
+    """Initializes the distributed environment."""
+    if not dist.is_initialized():
+        rank = int(os.environ["RANK"])
+        world_size = int(os.environ["WORLD_SIZE"])
+        local_rank = int(os.environ["LOCAL_RANK"])
+        torch.cuda.set_device(local_rank)
+        dist.init_process_group(backend=backend, rank=rank, world_size=world_size)
+    return int(os.environ["RANK"]), int(os.environ["LOCAL_RANK"]), int(os.environ["WORLD_SIZE"])
+def cleanup_distributed():
+    dist.destroy_process_group()
+# --- Modified Trainer Class ---
+class SLatFlowMatchingTrainer(SparseFlowMatchingTrainer):
+    def __init__(self, *args, rank: int, local_rank: int, world_size: int, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.cfg = kwargs.pop('cfg', None)
+        if self.cfg is None:
+            raise ValueError("Configuration dictionary 'cfg' must be provided.")
+        # --- Distributed-related attributes ---
+        self.rank = rank
+        self.local_rank = local_rank
+        self.world_size = world_size
+        self.device = torch.device(f"cuda:{self.local_rank}")
+        self.is_master = (self.rank == 0)
+        self.i_save = self.cfg['training']['save_every']
+        self.save_dir = self.cfg['training']['output_dir']
+        self.resolution = 128
+        self.condition_type = 'image'
+        self.is_cond = False
+        self.img_res = 518
+        if self.is_master:
+            os.makedirs(self.save_dir, exist_ok=True)
+            print(f"Checkpoints and logs will be saved to: {self.save_dir}")
+        # Initialize components and set up for DDP
+        self._init_components(
+            clip_model_path=self.cfg['training'].get('clip_model_path', None),
+            dinov2_model_path=self.cfg['training'].get('dinov2_model_path', None),
+            vae_path=self.cfg['training']['vae_path'],
+        )
+        self._setup_ddp()
+        self.denoiser_checkpoint_path = self.cfg['training'].get('denoiser_checkpoint_path', None)
+        trainable_params = list(self.denoiser.parameters())
+        self.optimizer = AdamW(trainable_params, lr=self.cfg['training'].get('lr', 0.0001), weight_decay=0.0)
+        self.scaler = GradScaler()
+        if self.is_master:
+            print("Using Automatic Mixed Precision (AMP) with GradScaler.")
+    def _init_components(self,
+            clip_model_path=None,
+            dinov2_model_path=None,
+            vae_path=None,
+        ):
+        """
+        Initializes VAE, VoxelEncoder (PointNet), and condition models.
+        """
+        def load_file_func(path, device='cpu'):
+            return torch.load(path, map_location=device)
+        def _load_and_broadcast(model, load_fn=None, path=None, strict=True):
+            if self.is_master:
+                try:
+                    state = load_fn(path) if load_fn else model.state_dict()
+                except Exception as e:
+                    raise RuntimeError(f"Failed to load weights from {path}: {e}")
+            else:
+                state = None
+            dist.barrier()
+            state_b = [state] if self.is_master else [None]
+            dist.broadcast_object_list(state_b, src=0)
+            try:
+                # Handle potential key mismatches (e.g. 'module.' prefix)
+                model.load_state_dict(state_b[0], strict=strict)
+            except Exception as e:
+                if self.is_master: print(f"Strict loading failed for {model.__class__.__name__}, trying non-strict: {e}")
+                model.load_state_dict(state_b[0], strict=False)
+        # ------------------------- Voxel Encoder (PointNet) -------------------------
+        # Matching the VAE script configuration
+        self.voxel_encoder = VoxelFeatureEncoder_active_pointnet(
+            in_channels=15,
+            hidden_dim=256,
+            out_channels=1024,
+            scatter_type='mean',
+            n_blocks=5,
+            resolution=128,
+            add_label=False,
+        ).to(self.device)
+        # ------------------------- VAE -------------------------
+        self.vae = VoxelVAE(
+            in_channels=self.cfg['model']['in_channels'],
+            latent_dim=self.cfg['model']['latent_dim'],
+            encoder_blocks=self.cfg['model']['encoder_blocks'],
+            decoder_blocks_vtx=self.cfg['model']['decoder_blocks_vtx'],
+            decoder_blocks_edge=self.cfg['model']['decoder_blocks_edge'],
+            num_heads=8,
+            num_head_channels=64,
+            mlp_ratio=4.0,
+            attn_mode="swin",
+            window_size=8,
+            pe_mode="ape",
+            use_fp16=False,
+            use_checkpoint=True,
+            qk_rms_norm=False,
+            using_subdivide=True,
+            using_attn=self.cfg['model']['using_attn'],
+            attn_first=self.cfg['model'].get('attn_first', True),
+            pred_direction=self.cfg['model'].get('pred_direction', False),
+        ).to(self.device)
+        # ------------------------- Conditioning -------------------------
+        if self.condition_type == 'text':
+            self.tokenizer = AutoTokenizer.from_pretrained(clip_model_path)
+            if self.is_master:
+                self.condition_model = CLIPTextModel.from_pretrained(clip_model_path)
+            else:
+                config = CLIPTextConfig.from_pretrained(clip_model_path)
+                self.condition_model = CLIPTextModel(config)
+            _load_and_broadcast(self.condition_model)
+        elif self.condition_type == 'image':
+            if self.is_master:
+                print("Initializing for IMAGE conditioning (DINOv2).")
+            # Update paths as per your environment
+            local_repo_path = "/root/Trisf/dinov2_resources/dinov2-main"
+            weights_path = "/root/Trisf/dinov2_resources/dinov2_vitl14_reg4_pretrain.pth"
+            dinov2_model = torch.hub.load(
+                repo_or_dir=local_repo_path,
+                model='dinov2_vitl14_reg',
+                source='local',
+                pretrained=False
+            )
+            self.condition_model = dinov2_model
+            _load_and_broadcast(self.condition_model, load_fn=torch.load, path=weights_path)
+            self.image_cond_model_transform = transforms.Compose([
+                transforms.ToTensor(),
+                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+            ])
+        else:
+            raise ValueError(f"Unsupported condition type: {self.condition_type}")
+        self.condition_model.to(self.device).eval()
+        for p in self.condition_model.parameters(): p.requires_grad = False
+        # ------------------------- Load VAE/Encoder Weights -------------------------
+        # Load weights corresponding to the logic in VAE script's `load_pretrained_woself`
+        # Assuming checkpoint contains 'vae' and 'voxel_encoder' keys
+        _load_and_broadcast(self.vae,
+                            load_fn=lambda p: load_file_func(p)['vae'],
+                            path=vae_path)
+        _load_and_broadcast(self.voxel_encoder,
+                            load_fn=lambda p: load_file_func(p)['voxel_encoder'],
+                            path=vae_path)
+        self.vae.eval()
+        self.voxel_encoder.eval()
+        for p in self.vae.parameters(): p.requires_grad = False
+        for p in self.voxel_encoder.parameters(): p.requires_grad = False
+    def _load_denoiser(self):
+        """Loads a checkpoint for the denoiser."""
+        path = self.denoiser_checkpoint_path
+        if not path or not os.path.isfile(path):
+            if self.is_master:
+                print("No valid checkpoint path provided for denoiser. Starting from scratch.")
+            return
+        if self.is_master:
+            print(f"Loading denoiser checkpoint from: {path}")
+            checkpoint = torch.load(path, map_location=self.device)
+        else:
+            checkpoint = None
+        dist.barrier()
+        dist_list = [checkpoint] if self.is_master else [None]
+        dist.broadcast_object_list(dist_list, src=0)
+        checkpoint = dist_list[0]
+        try:
+            self.denoiser.module.load_state_dict(checkpoint['denoiser'])
+            if self.is_master: print("Denoiser weights loaded successfully.")
+        except Exception as e:
+            if self.is_master: print(f"[ERROR] Failed to load denoiser state_dict: {e}")
+        if 'step' in checkpoint and self.is_master:
+            print(f"Checkpoint from step {checkpoint['step']}.")
+        dist.barrier()
+    def _setup_ddp(self):
+        """Sets up DDP and DataLoaders."""
+        self.denoiser = self.denoiser.to(self.device)
+        self.denoiser = DDP(self.denoiser, device_ids=[self.local_rank])
+        for param in self.denoiser.parameters():
+            param.requires_grad = True
+        # Use the Dataset from the VAE script
+        self.dataset = VoxelVertexDataset_edge(
+            root_dir=self.cfg['dataset']['path'],
+            base_resolution=self.cfg['dataset']['base_resolution'],
+            min_resolution=self.cfg['dataset']['min_resolution'],
+            cache_dir=self.cfg['dataset']['cache_dir'],
+            renders_dir=self.cfg['dataset']['renders_dir'],
+            filter_active_voxels=self.cfg['dataset']['filter_active_voxels'],
+            cache_filter_path=self.cfg['dataset']['cache_filter_path'],
+            active_voxel_res=128,
+            pc_sample_number=819200,
+            sample_type=self.cfg['dataset']['sample_type'],
+        )
+        self.sampler = DistributedSampler(
+            self.dataset,
+            num_replicas=self.world_size,
+            rank=self.rank,
+            shuffle=True
+        )
+        # Use collate_fn_pointnet
+        self.dataloader = DataLoader(
+            self.dataset,
+            batch_size=self.cfg['training']['batch_size'],
+            shuffle=False,
+            collate_fn=partial(collate_fn_pointnet,),
+            num_workers=self.cfg['training']['num_workers'],
+            pin_memory=True,
+            sampler=self.sampler,
+            prefetch_factor=4,
+            persistent_workers=True,
+            drop_last=True,
+        )
+    @torch.no_grad()
+    def encode_image(self, images) -> torch.Tensor:
+        if isinstance(images, torch.Tensor):
+            batch_tensor = images.to(self.device)
+        elif isinstance(images, list):
+            assert all(isinstance(i, Image.Image) for i in images), "Image list should be list of PIL images"
+            image = [i.resize((518, 518), Image.LANCZOS) for i in images]
+            image = [np.array(i.convert('RGB')).astype(np.float32) / 255 for i in image]
+            image = [torch.from_numpy(i).permute(2, 0, 1).float() for i in image]
+            batch_tensor = torch.stack(image).to(self.device)
+        else:
+            raise ValueError(f"Unsupported type of image: {type(image)}")
+        if batch_tensor.shape[-2:] != (518, 518):
+             batch_tensor = F.interpolate(batch_tensor, (518, 518), mode='bicubic', align_corners=False)
+        features = self.condition_model(batch_tensor, is_training=True)['x_prenorm']
+        patchtokens = F.layer_norm(features, features.shape[-1:])
+        return patchtokens
+    def process_batch(self, batch):
+        preprocessed_images = batch['image']
+        cond_ = self.encode_image(preprocessed_images)
+        return cond_
+    def train(self, num_epochs=10000):
+        # Unconditional handling for text/image
+        if self.is_cond == False:
+            if self.condition_type == 'text':
+                txt = ['']
+                encoding = self.tokenizer(txt, max_length=77, padding='max_length', truncation=True, return_tensors='pt')
+                tokens = encoding['input_ids'].to(self.device)
+                with torch.no_grad():
+                    cond_ = self.condition_model(input_ids=tokens).last_hidden_state
+            else:
+                blank_img = Image.fromarray(np.zeros((self.img_res, self.img_res, 3), dtype=np.uint8))
+                with torch.no_grad():
+                    dummy_cond = self.encode_image([blank_img])
+                    cond_ = torch.zeros_like(dummy_cond)
+                if self.is_master: print(f"Generated unconditional image prompt with shape: {cond_.shape}")
+        self._load_denoiser()
+        self.denoiser.train()
+        # Step tracking
+        step = 0
+        if self.denoiser_checkpoint_path:
+            match = re.search(r'step(\d+)', self.denoiser_checkpoint_path)
+            if match:
+                step = int(match.group(1))
+        step = 0
+        for epoch in range(num_epochs):
+            self.sampler.set_epoch(epoch)
+            epoch_losses = []
+            for i, batch in enumerate(self.dataloader):
+                self.optimizer.zero_grad()
+                # --- Conditioning ---
+                if self.is_cond and self.condition_type == 'image':
+                    cond_ = self.process_batch(batch)
+                # Retrieve Data from collate_fn_pointnet
+                point_cloud = batch['point_cloud_128'].to(self.device)
+                active_coords = batch['active_voxels_128'].to(self.device)
+                # Handle Batch Size for Conditioning
+                batch_size = int(active_coords[:, 0].max().item() + 1)
+                if cond_.shape[0] != batch_size:
+                    cond_ = cond_.expand(batch_size, -1, -1).contiguous().to(self.device)
+                else:
+                    cond_ = cond_.to(self.device)
+                with autocast(device_type='cuda', dtype=torch.bfloat16):
+                    with torch.no_grad():
+                        # 1. Encode Point Cloud to Features on Active Voxels
+                        # The encoder processes point clouds and scatters features to `active_coords`
+                        active_voxel_feats = self.voxel_encoder(
+                            p=point_cloud,
+                            sparse_coords=active_coords,
+                            res=128,
+                            bbox_size=(-0.5, 0.5),
+                        )
+                        # 2. Prepare Sparse Input for VAE
+                        sparse_input = SparseTensor(
+                            feats=active_voxel_feats,
+                            coords=active_coords.int()
+                        )
+                        # 3. Get Latent Distribution from VAE
+                        # We use the encode method of VoxelVAE to get posterior
+                        latent_128, posterior = self.vae.encode(sparse_input)
+                    # 5. Calculate Diffusion Loss
+                    terms, _ = self.training_losses(x_0=latent_128, cond=cond_)
+                    loss = terms['loss']
+                self.scaler.scale(loss).backward()
+                self.scaler.step(self.optimizer)
+                self.scaler.update()
+                with torch.no_grad():
+                    avg_loss = loss.detach()
+                    dist.all_reduce(avg_loss, op=dist.ReduceOp.AVG)
+                step += 1
+                # --- Logging and Saving ---
+                if self.is_master:
+                    epoch_losses.append(avg_loss.item())
+                    if step % 10 == 0:
+                        print(f"Epoch {epoch+1} Step {step}: "
+                            f"Rank0_Loss = {loss.item():.4f}, "
+                            f"Global_Avg_Loss = {avg_loss.item():.4f}, "
+                            f"Epoch_Mean = {np.mean(epoch_losses):.4f}")
+                    if step % self.i_save == 0 or step == 1:
+                        checkpoint = {
+                            'denoiser': self.denoiser.module.state_dict(),
+                            'step': step
+                        }
+                        loss_val_str = f"{loss.item():.6f}".replace('.', '_')
+                        save_path = os.path.join(self.save_dir, f"checkpoint_step{step}_loss{loss_val_str}.pt")
+                        torch.save(checkpoint, save_path)
+                        print(f"Saved checkpoint to {save_path}")
+            if self.is_master:
+                avg_loss = np.mean(epoch_losses) if epoch_losses else 0
+                log_path = os.path.join(self.save_dir, "loss_log.txt")
+                with open(log_path, "a") as f:
+                    f.write(f"Epoch {epoch+1}, Step {step}, AvgLoss {avg_loss:.6f}\n")
+            dist.barrier()
+            # torch.cuda.empty_cache()
+            # gc.collect()
+        if self.is_master:
+            print("Training complete.")
+def main():
+    if mp.get_start_method(allow_none=True) != 'spawn':
+        mp.set_start_method('spawn', force=True)
+    rank, local_rank, world_size = setup_distributed()
+    torch.manual_seed(42)
+    np.random.seed(42)
+    # Path to your config
+    config_path = "/root/Trisf/config_slat_flow_128to1024_pointnet_head.yaml"
+    with open(config_path) as f:
+        cfg = yaml.safe_load(f)
+    # Initialize Flow Model (on CPU first)
+    diffusion_model = SLatFlowModel(
+        resolution=cfg['flow']['resolution'],
+        in_channels=cfg['flow']['in_channels'],
+        out_channels=cfg['flow']['out_channels'],
+        model_channels=cfg['flow']['model_channels'],
+        cond_channels=cfg['flow']['cond_channels'],
+        num_blocks=cfg['flow']['num_blocks'],
+        num_heads=cfg['flow']['num_heads'],
+        mlp_ratio=cfg['flow']['mlp_ratio'],
+        patch_size=cfg['flow']['patch_size'],
+        num_io_res_blocks=cfg['flow']['num_io_res_blocks'],
+        io_block_channels=cfg['flow']['io_block_channels'],
+        pe_mode=cfg['flow']['pe_mode'],
+        qk_rms_norm=cfg['flow']['qk_rms_norm'],
+        qk_rms_norm_cross=cfg['flow']['qk_rms_norm_cross'],
+        use_fp16=cfg['flow'].get('use_fp16', False),
+    )
+    torch.manual_seed(42 + rank)
+    np.random.seed(42 + rank)
+    trainer = SLatFlowMatchingTrainer(
+        denoiser=diffusion_model,
+        t_schedule=cfg['t_schedule'],
+        sigma_min=cfg['sigma_min'],
+        cfg=cfg,
+        rank=rank,
+        local_rank=local_rank,
+        world_size=world_size,
+    )
+    trainer.train()
+    cleanup_distributed()
+if __name__ == '__main__':
+    main()

train_slat_vae_512_128to1024_pointnet.py ADDED Viewed

	@@ -0,0 +1,682 @@

+import torch.nn as nn
+import os
+# os.environ['ATTN_BACKEND'] = 'xformers'
+import yaml
+import torch
+import time
+from datetime import datetime
+from torch.utils.data import DataLoader
+from functools import partial
+from triposf.modules.sparse.basic import SparseTensor
+import torch.nn.functional as F
+from torch.optim import AdamW
+from torch.cuda.amp import GradScaler, autocast
+from triposf.models.triposf_vae.VoxelFeatureVAE_edge_woself_128to1024_decoder import VoxelVAE
+from vertex_encoder import VoxelFeatureEncoder_active_pointnet
+from dataset_triposf import VoxelVertexDataset_edge, collate_fn_pointnet
+from utils import load_pretrained_woself, AdaptiveFocalLoss, fast_isin, AsymmetricFocalLoss, DiceLoss
+import torch.distributed as dist
+from torch.nn.parallel import DistributedDataParallel as DDP
+from torch.utils.data.distributed import DistributedSampler
+from transformers import get_cosine_schedule_with_warmup
+import math
+def flatten_coords_4d(coords_4d: torch.Tensor):
+    coords_4d_long = coords_4d.long()
+    base_x = 1024
+    base_y = 1024 * 1024
+    base_z = 1024 * 1024 * 1024
+    flat_coords = coords_4d_long[:, 0] * base_z + \
+                  coords_4d_long[:, 1] * base_y + \
+                  coords_4d_long[:, 2] * base_x + \
+                  coords_4d_long[:, 3]
+    return flat_coords
+def downsample_voxels(
+    voxels: torch.Tensor,
+    input_resolution: int,
+    output_resolution: int
+) -> torch.Tensor:
+    if input_resolution % output_resolution != 0:
+        raise ValueError(f"input_resolution ({input_resolution}) must be divisible "
+                         f"by output_resolution ({output_resolution}).")
+    factor = input_resolution // output_resolution
+    downsampled_voxels = voxels.clone().to(torch.long)
+    downsampled_voxels[:, 1:] //= factor
+    unique_downsampled_voxels = torch.unique(downsampled_voxels, dim=0)
+    return unique_downsampled_voxels
+class Trainer:
+    def __init__(self, config_path, rank, world_size, local_rank):
+        self.rank = rank
+        self.world_size = world_size
+        self.local_rank = local_rank
+        self.is_master = self.rank == 0
+        self.load_config(config_path)
+        self.accum_steps = max(1, 8 // self.cfg['training']['batch_size'])
+        self.config_hash = self.save_config_with_hash()
+        self.init_device()
+        self.init_dirs()
+        self.init_components()
+        self.init_training()
+        self.train_loss_history = []
+        self.eval_loss_history = []
+        self.best_eval_loss = float('inf')
+    def save_config_with_hash(self):
+        import hashlib
+        # Serialize config to hash
+        config_str = yaml.dump(self.cfg)
+        config_hash = hashlib.md5(config_str.encode()).hexdigest()[:8]
+        # Prepare all flags as string for formatting
+        add_block_embed_flag = "True" if self.cfg['model']['add_block_embed'] else "False"
+        using_attn_flag = "True" if self.cfg['model']['using_attn'] else "False"
+        dataset_name = os.path.basename(self.cfg['dataset']['path'])
+        # Format save_dir with all placeholders
+        self.cfg['experiment']['save_dir'] = self.cfg['experiment']['save_dir'].format(
+            dataset_name=dataset_name,
+            config_hash=config_hash,
+            n_train_samples=self.cfg['dataset']['n_train_samples'],
+            multires=self.cfg['model']['multires'],
+            add_block_embed=add_block_embed_flag,
+            using_attn=using_attn_flag,
+            batch_size=self.cfg['training']['batch_size'],
+        )
+        if self.is_master:
+            os.makedirs(self.save_dir, exist_ok=True)
+            config_path = os.path.join(self.save_dir, "config.yaml")
+            with open(config_path, 'w') as f:
+                yaml.dump(self.cfg, f)
+        dist.barrier()
+        return config_hash
+    def save_checkpoint(self, epoch, avg_loss, batch_idx):
+        if not self.is_master:
+            return
+        checkpoint_path = os.path.join(self.save_dir, f"checkpoint_epoch{epoch}_batch{batch_idx}_loss{avg_loss:.4f}.pt")
+        config_path = os.path.join(self.save_dir, "config.yaml")
+        torch.save({
+            'voxel_encoder': self.voxel_encoder.module.state_dict(),
+            'vae': self.vae.module.state_dict(),
+            'epoch': epoch,
+            'loss': avg_loss,
+            'config': self.cfg
+        }, checkpoint_path)
+        def quoted_presenter(dumper, data):
+            return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='"')
+        yaml.add_representer(str, quoted_presenter)
+        with open(config_path, 'w') as f:
+            yaml.dump(self.cfg, f)
+    def load_config(self, config_path):
+        with open(config_path) as f:
+            self.cfg = yaml.safe_load(f)
+        # Extract and convert flags for formatting
+        add_block_embed_flag = "True" if self.cfg['model']['add_block_embed'] else "False"
+        using_attn_flag = "True" if self.cfg['model']['using_attn'] else "False"
+        dataset_name = os.path.basename(self.cfg['dataset']['path'])
+        self.save_dir = self.cfg['experiment']['save_dir'].format(
+            dataset_name=dataset_name,
+            n_train_samples=self.cfg['dataset']['n_train_samples'],
+            multires=self.cfg['model']['multires'],
+            add_block_embed=add_block_embed_flag,
+            using_attn=using_attn_flag,
+            batch_size=self.cfg['training']['batch_size'],
+        )
+        if self.is_master:
+            os.makedirs(self.save_dir, exist_ok=True)
+        dist.barrier()
+    def init_device(self):
+        self.device = torch.device(f"cuda:{self.local_rank}")
+    def init_dirs(self):
+        self.log_file = os.path.join(self.save_dir, f"training_log_{self.cfg['training']['lr']}.txt")
+        if self.is_master:
+            with open(self.log_file, "a") as f:
+                current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+                f.write(f"[{current_time}] Config loaded for distributed training with world size {self.world_size}\n")
+    def init_components(self):
+        # self.dataset = VoxelVertexDataset_edge_shapenet(
+        #     root_dir=self.cfg['dataset']['path'],
+        #     file_list_path=self.cfg['dataset']['file_list_path'],
+        #     map_file_path=self.cfg['dataset']['map_file_path'],
+        #     base_resolution=self.cfg['dataset']['base_resolution'],
+        #     min_resolution=self.cfg['dataset']['min_resolution'],
+        #     cache_dir=self.cfg['dataset']['cache_dir'],
+        #     renders_dir=self.cfg['dataset']['renders_dir'],
+        #     filter_active_voxels=self.cfg['dataset']['filter_active_voxels'],
+        #     cache_filter_path=self.cfg['dataset']['cache_filter_path'],
+        # )
+        self.dataset = VoxelVertexDataset_edge(
+            root_dir=self.cfg['dataset']['path'],
+            base_resolution=self.cfg['dataset']['base_resolution'],
+            min_resolution=self.cfg['dataset']['min_resolution'],
+            cache_dir=self.cfg['dataset']['cache_dir'],
+            renders_dir=self.cfg['dataset']['renders_dir'],
+            filter_active_voxels=self.cfg['dataset']['filter_active_voxels'],
+            cache_filter_path=self.cfg['dataset']['cache_filter_path'],
+            active_voxel_res=128,
+            pc_sample_number=819200,
+            sample_type=self.cfg['dataset']['sample_type'],
+        )
+        self.sampler = DistributedSampler(
+            self.dataset,
+            num_replicas=self.world_size,
+            rank=self.rank,
+            shuffle=True,
+        )
+        self.dataloader = DataLoader(
+            self.dataset,
+            batch_size=self.cfg['training']['batch_size'],
+            shuffle=False,
+            collate_fn=partial(collate_fn_pointnet,),
+            num_workers=self.cfg['training']['num_workers'],
+            pin_memory=True,
+            sampler=self.sampler,
+            # prefetch_factor=4,
+            persistent_workers=True,
+        )
+        self.voxel_encoder = VoxelFeatureEncoder_active_pointnet(
+            in_channels=15,
+            hidden_dim=256,
+            out_channels=1024,
+            scatter_type='mean',
+            n_blocks=5,
+            resolution=128,
+            add_label=False,
+        ).to(self.device)
+        # ablation 3: voxelvae_1volume, have tested
+        self.vae = VoxelVAE(
+            in_channels=self.cfg['model']['in_channels'],
+            latent_dim=self.cfg['model']['latent_dim'],
+            encoder_blocks=self.cfg['model']['encoder_blocks'],
+            decoder_blocks_vtx=self.cfg['model']['decoder_blocks_vtx'],
+            decoder_blocks_edge=self.cfg['model']['decoder_blocks_edge'],
+            num_heads=8,
+            num_head_channels=64,
+            mlp_ratio=4.0,
+            attn_mode="swin",
+            window_size=8,
+            pe_mode="ape",
+            use_fp16=False,
+            use_checkpoint=True,
+            qk_rms_norm=False,
+            using_subdivide=True,
+            using_attn=self.cfg['model']['using_attn'],
+            attn_first=self.cfg['model'].get('attn_first', True),
+            pred_direction=self.cfg['model'].get('pred_direction', False),
+        ).to(self.device)
+        if self.cfg['training']['from_pretrained']:
+            load_pretrained_woself(
+                checkpoint_path=self.cfg['training']['checkpoint_path'],
+                voxel_encoder=self.voxel_encoder,
+                vae=self.vae,
+                optimizer=None,
+            )
+        self.voxel_encoder = DDP(self.voxel_encoder, device_ids=[self.local_rank], find_unused_parameters=False)
+        self.vae = DDP(self.vae, device_ids=[self.local_rank], find_unused_parameters=False)
+    def init_training(self):
+        self.optimizer = AdamW(
+            list(self.vae.module.parameters()) +
+            list(self.voxel_encoder.module.parameters()),
+            lr=self.cfg['training']['lr'],
+            weight_decay=0.01,
+        )
+        num_update_steps_per_epoch = math.ceil(len(self.dataloader) / self.accum_steps)
+        # print('num_update_steps_per_epoch', num_update_steps_per_epoch) # 1305
+        max_epochs = self.cfg['training']['max_epochs']
+        num_training_steps = max_epochs * num_update_steps_per_epoch
+        num_warmup_steps = 1000
+        # self.scheduler = torch.optim.lr_scheduler.LambdaLR(
+        #     self.optimizer,
+        #     lr_lambda=lambda epoch: self.cfg['training']['gamma'] ** (epoch // self.cfg['training']['step_size'])
+        # )
+        self.scheduler = get_cosine_schedule_with_warmup(
+            self.optimizer,
+            num_warmup_steps=num_warmup_steps,
+            num_training_steps=num_training_steps
+        )
+        self.focal_loss = AdaptiveFocalLoss(gamma=2.0, max_alpha=10.0).to(self.device)
+        self.mse_loss = nn.MSELoss(reduction='mean').to(self.device)
+        self.asyloss = AsymmetricFocalLoss(
+            gamma_pos=0.0,
+            gamma_neg=4.0,
+            clip=0.05,
+        )
+        self.bce_loss = torch.nn.BCEWithLogitsLoss()
+        self.dice_loss = DiceLoss()
+        self.scaler = GradScaler()
+    def train_step(self, batch):
+        """Modified training step that handles vertex and edge voxels separately after initial prediction."""
+        # 1. Retrieve data from batch
+        combined_voxels_1024 = batch['combined_voxels_1024'].to(self.device)
+        combined_voxel_labels_1024 = batch['combined_voxel_labels_1024'].to(self.device)
+        gt_vertex_voxels_1024 = batch['gt_vertex_voxels_1024'].to(self.device)
+        gt_edge_voxels_1024_ = batch['gt_edge_voxels_1024'].to(self.device)
+        gt_combined_endpoints_1024 = batch['gt_combined_endpoints_1024'].to(self.device)
+        gt_combined_errors_1024 = batch['gt_combined_errors_1024'].to(self.device)
+        edge_mask = (combined_voxel_labels_1024 == 1)
+        gt_edge_endpoints_1024 = gt_combined_endpoints_1024[edge_mask]
+        gt_edge_errors_1024 = gt_combined_errors_1024[edge_mask]
+        gt_edge_voxels_1024 = combined_voxels_1024[edge_mask].to(self.device)
+        # print('gt_edge_voxels_1024_-gt_edge_voxels_1024.sum()', (gt_edge_voxels_1024_-gt_edge_voxels_1024).sum())
+        p1 = gt_edge_endpoints_1024[:, 1:4].float()
+        p2 = gt_edge_endpoints_1024[:, 4:7].float()
+        mask = ( (p1[:,0] < p2[:,0]) |
+                ((p1[:,0] == p2[:,0]) & (p1[:,1] < p2[:,1])) |
+                ((p1[:,0] == p2[:,0]) & (p1[:,1] == p2[:,1]) & (p1[:,2] <= p2[:,2])) )
+        pA = torch.where(mask[:, None], p1, p2)  # smaller one
+        pB = torch.where(mask[:, None], p2, p1)  # larger one
+        d = pB - pA
+        dir_gt = F.normalize(d, dim=-1, eps=1e-6)
+        vtx_128 = downsample_voxels(gt_vertex_voxels_1024, input_resolution=1024, output_resolution=128)
+        vtx_256 = downsample_voxels(gt_vertex_voxels_1024, input_resolution=1024, output_resolution=256)
+        vtx_512 = downsample_voxels(gt_vertex_voxels_1024, input_resolution=1024, output_resolution=512)
+        vtx_1024 = gt_vertex_voxels_1024
+        edge_128 = downsample_voxels(combined_voxels_1024, input_resolution=1024, output_resolution=128)
+        edge_256 = downsample_voxels(combined_voxels_1024, input_resolution=1024, output_resolution=256)
+        edge_512 = downsample_voxels(combined_voxels_1024, input_resolution=1024, output_resolution=512)
+        edge_1024 = combined_voxels_1024
+        active_coords = batch['active_voxels_128'].to(self.device)
+        point_cloud = batch['point_cloud_128'].to(self.device)
+        active_voxel_feats = self.voxel_encoder(
+            p=point_cloud,
+            sparse_coords=active_coords,
+            res=128,
+            bbox_size=(-0.5, 0.5),
+            # voxel_label=active_labels,
+        )
+        sparse_input = SparseTensor(
+            feats=active_voxel_feats,
+            coords=active_coords.int()
+        )
+        gt_edge_voxels_list = [
+            edge_128,
+            edge_256,
+            edge_512,
+            edge_1024,
+        ]
+        gt_vertex_voxels_list = [
+            vtx_128,
+            vtx_256,
+            vtx_512,
+            vtx_1024,
+        ]
+        results, posterior, latent_128 = self.vae(
+            sparse_input,
+            gt_vertex_voxels_list=gt_vertex_voxels_list,
+            gt_edge_voxels_list=gt_edge_voxels_list,
+            training=True,
+            sample_ratio=0.,
+        )
+        # print("results[-1]['edge']['coords_4d'][1827:1830]", results[-1]['edge']['coords_4d'][1827:1830])
+        total_loss = 0.
+        prune_loss_total = 0.
+        vertex_loss_total = 0.
+        edge_loss_total=0.
+        with autocast(dtype=torch.bfloat16):
+            initial_result = results[0]
+            vertex_mask = initial_result['vertex_mask']
+            vtx_logits = initial_result['vtx_feats']
+            vertex_loss = self.asyloss(vtx_logits.squeeze(-1), vertex_mask.float())
+            edge_mask = initial_result['edge_mask']
+            edge_logits = initial_result['edge_feats']
+            edge_loss = self.asyloss(edge_logits.squeeze(-1), edge_mask.float())
+            vertex_loss_total += vertex_loss
+            edge_loss_total += edge_loss
+            total_loss += vertex_loss
+            total_loss += edge_loss
+            # Process each level's results
+            for idx, res_dict in enumerate(results[1:], start=1):
+                # Vertex branch losses
+                vertex_pred_coords = res_dict['vertex']['occ_coords']
+                vertex_occ_probs = res_dict['vertex']['occ_probs']
+                vertex_gt_coords = res_dict['vertex']['coords']
+                vertex_labels = fast_isin(vertex_pred_coords, vertex_gt_coords, resolution=1024).float()
+                # print('vertex_labels.sum()', vertex_labels.sum(), idx)
+                vertex_logits = vertex_occ_probs.squeeze()
+                # if vertex_labels.sum() > 0 and vertex_labels.sum() < len(vertex_labels):
+                vertex_prune_loss = self.focal_loss(vertex_logits, vertex_labels)
+                # vertex_prune_loss = self.dice_loss(vertex_logits, vertex_labels)
+                # dilation 1: bce loss
+                # vertex_prune_loss = self.bce_loss(vertex_logits, vertex_labels,)
+                prune_loss_total += vertex_prune_loss
+                total_loss += vertex_prune_loss
+                # Edge branch losses
+                edge_pred_coords = res_dict['edge']['occ_coords']
+                edge_occ_probs = res_dict['edge']['occ_probs']
+                edge_gt_coords = res_dict['edge']['coords']
+                edge_labels = fast_isin(edge_pred_coords, edge_gt_coords, resolution=1024).float()
+                # print('edge_labels.sum()', edge_labels.sum(), idx)
+                edge_logits = edge_occ_probs.squeeze()
+                # if edge_labels.sum() > 0 and edge_labels.sum() < len(edge_labels):
+                edge_prune_loss = self.focal_loss(edge_logits, edge_labels)
+                # dilation 1: bce loss
+                # edge_prune_loss = self.bce_loss(edge_logits, edge_labels,)
+                prune_loss_total += edge_prune_loss
+                total_loss += edge_prune_loss
+                if idx == 3:
+                    pred_coords = res_dict['edge']['coords_4d']              # [N,4] (b,x,y,z)
+                    pred_feats  = res_dict['edge']['predicted_offset_feats'] # [N,C]
+                    gt_coords = gt_edge_voxels_1024.to(pred_coords.device)    # [M,4]
+                    gt_feats  = gt_edge_errors_1024[:, 1:].to(pred_coords.device)  # [M,C]
+                    pred_keys = flatten_coords_4d(pred_coords)
+                    gt_keys   = flatten_coords_4d(gt_coords)
+                    sorted_pred_keys, pred_order = torch.sort(pred_keys)
+                    pred_coords_sorted = pred_coords[pred_order]
+                    pred_feats_sorted  = pred_feats[pred_order]
+                    sorted_gt_keys, gt_order = torch.sort(gt_keys)
+                    gt_coords_sorted = gt_coords[gt_order]
+                    gt_feats_sorted  = gt_feats[gt_order]
+                    # pos = torch.searchsorted(sorted_gt_keys, sorted_pred_keys)
+                    # valid_pos = pos < len(sorted_gt_keys)
+                    # matched = torch.zeros_like(valid_pos, dtype=torch.bool)
+                    # matched[valid_pos] = (sorted_gt_keys[pos[valid_pos]] == sorted_pred_keys[valid_pos])
+                    # valid_mask = valid_pos & matched
+                    pos = torch.searchsorted(sorted_gt_keys, sorted_pred_keys)
+                    valid_mask = (pos < len(sorted_gt_keys)) & (sorted_gt_keys[pos] == sorted_pred_keys)
+                    if valid_mask.any():
+                        matched_pred_feats = pred_feats_sorted[valid_mask]
+                        matched_gt_feats   = gt_feats_sorted[pos[valid_mask]]
+                        mse_loss_feats = self.mse_loss(matched_pred_feats, matched_gt_feats * 2)
+                        total_loss += mse_loss_feats
+                        if self.cfg['model'].get('pred_direction', False):
+                            pred_dirs = res_dict['edge']['predicted_direction_feats']
+                            dir_gt_device = dir_gt.to(pred_coords.device)
+                            pred_dirs_sorted = pred_dirs[pred_order]
+                            dir_gt_sorted = dir_gt_device[gt_order]
+                            matched_pred_dirs = pred_dirs_sorted[valid_mask]
+                            matched_gt_dirs   = dir_gt_sorted[pos[valid_mask]]
+                            mse_loss_dirs = self.mse_loss(matched_pred_dirs, matched_gt_dirs)
+                            total_loss += mse_loss_dirs
+                    else:
+                        mse_loss_feats = torch.tensor(0., device=pred_coords.device)
+                    if self.cfg['model'].get('pred_direction', False):
+                        mse_loss_dirs = torch.tensor(0., device=pred_coords.device)
+            # KL loss
+            kl_loss = posterior.kl(dims=(1,)).mean() * 1e-3 # 1e-3 before
+            total_loss += kl_loss
+        # Backpropagation
+        scaled_total_loss = total_loss / self.accum_steps
+        self.scaler.scale(scaled_total_loss).backward()
+        return {
+            'total_loss': total_loss.item(),
+            'kl_loss': kl_loss.item(),
+            'prune_loss': prune_loss_total.item(),
+            'vertex_loss': vertex_loss_total.item(),
+            'edge_loss': edge_loss_total.item(),
+            'offset_loss': mse_loss_feats.item(),
+            'direction_loss': mse_loss_dirs.item(),
+        }
+    def train(self):
+        accum_steps = self.accum_steps
+        for epoch in range(self.cfg['training']['start_epoch'], self.cfg['training']['max_epochs']):
+            self.dataloader.sampler.set_epoch(epoch)
+            # Initialize metrics
+            metrics = {
+                'total_loss': 0.0,
+                'kl_loss': 0.0,
+                'prune_loss': 0.0,
+                'vertex_loss': 0.0,
+                'edge_loss': 0.0,
+                'offset_loss': 0.0,
+                'direction_loss': 0.0,
+            }
+            num_batches = 0
+            self.optimizer.zero_grad(set_to_none=True)
+            for i, batch in enumerate(self.dataloader):
+                # Get all losses from train_step
+                if batch is None:
+                    continue
+                step_losses = self.train_step(batch)
+                # Accumulate losses
+                for key in metrics:
+                    metrics[key] += step_losses[key]
+                num_batches += 1
+                if (i + 1) % accum_steps == 0:
+                    self.scaler.unscale_(self.optimizer)
+                    torch.nn.utils.clip_grad_norm_(self.vae.parameters(), max_norm=1.0)
+                    torch.nn.utils.clip_grad_norm_(self.voxel_encoder.parameters(), max_norm=1.0)
+                    self.scaler.step(self.optimizer)
+                    self.scaler.update()
+                    self.optimizer.zero_grad(set_to_none=True)
+                    self.scheduler.step()
+                # Print batch-level metrics
+                if self.is_master:
+                    avg_metric = {key: value / num_batches for key, value in metrics.items()}
+                    print(
+                        f"[Epoch {epoch}] Batch:{num_batches} "
+                        f"AvgL:{avg_metric['total_loss']:.4f} "
+                        f"Loss: {step_losses['total_loss']:.4f}, "
+                        f"KLL: {step_losses['kl_loss']:.4f}, "
+                        f"PruneL: {step_losses['prune_loss']:.4f}, "
+                        f"VertexL: {step_losses['vertex_loss']:.4f}, "
+                        f"EdgeL: {step_losses['edge_loss']:.4f}, "
+                        f"OffsetL: {step_losses['offset_loss']:.4f}, "
+                        f"DireL: {step_losses['direction_loss']:.4f}, "
+                        f"LR: {self.optimizer.param_groups[0]['lr']:.4e} "
+                    )
+                    if i % 2000 == 0 and i != 0:
+                        self.save_checkpoint(epoch, avg_metric['total_loss'], i)
+                        with open(self.log_file, "a") as f:
+                            current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+                            log_line = (
+                                f"Epoch {epoch:05d} | "
+                                f"Batch {i:05d} | "
+                                f"Loss: {avg_metric['total_loss']:.6f} "
+                                f"Avg KLL: {avg_metric['kl_loss']:.4f} "
+                                f"Avg PruneL: {avg_metric['prune_loss']:.4f} "
+                                f"Avg VertexL: {avg_metric['vertex_loss']:.4f} "
+                                f"Avg EdgeL: {avg_metric['edge_loss']:.4f} "
+                                f"Avg OffsetL: {avg_metric['offset_loss']:.4f} "
+                                f"Avg DireL: {avg_metric['direction_loss']:.4f} "
+                                f"LR: {self.optimizer.param_groups[0]['lr']:.4e} "
+                                f"[{current_time}]\n"
+                            )
+                            f.write(log_line)
+            if num_batches % accum_steps != 0:
+                self.scaler.unscale_(self.optimizer)
+                torch.nn.utils.clip_grad_norm_(self.vae.parameters(), max_norm=1.0)
+                torch.nn.utils.clip_grad_norm_(self.voxel_encoder.parameters(), max_norm=1.0)
+                self.scaler.step(self.optimizer)
+                self.scaler.update()
+                self.optimizer.zero_grad(set_to_none=True)
+                self.scheduler.step()
+            # Calculate epoch averages
+            avg_metrics = {key: value / num_batches for key, value in metrics.items()}
+            self.train_loss_history.append(avg_metrics['total_loss'])
+            # Log to file
+            if self.is_master:
+                with open(self.log_file, "a") as f:
+                    current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+                    log_line = (
+                        f"Epoch {epoch:05d} | "
+                        f"Loss: {avg_metrics['total_loss']:.6f} "
+                        f"Avg KLL: {avg_metrics['kl_loss']:.4f} "
+                        f"Avg PruneL: {avg_metrics['prune_loss']:.4f} "
+                        f"Avg VertexL: {avg_metrics['vertex_loss']:.4f} "
+                        f"Avg EdgeL: {avg_metrics['edge_loss']:.4f} "
+                        f"Avg OffsetL: {avg_metrics['offset_loss']:.4f} "
+                        f"Avg DireL: {avg_metrics['direction_loss']:.4f} "
+                        f"LR: {self.optimizer.param_groups[0]['lr']:.4e} "
+                        f"[{current_time}]\n"
+                    )
+                    f.write(log_line)
+                # Print epoch summary
+                print(
+                    f"[Epoch {epoch}] "
+                    f"Avg Loss: {avg_metrics['total_loss']:.4f} "
+                    f"Avg KLL: {avg_metrics['kl_loss']:.4f} "
+                    f"Avg PruneL: {avg_metrics['prune_loss']:.4f} "
+                    f"Avg VertexL: {avg_metrics['vertex_loss']:.4f} "
+                    f"Avg EdgeL: {avg_metrics['edge_loss']:.4f} "
+                    f"Avg OffsetL: {avg_metrics['offset_loss']:.4f} "
+                    f"Avg DireL: {avg_metrics['direction_loss']:.4f} "
+                    f"[{current_time}]\n"
+                )
+                # Save checkpoint
+                if epoch % self.cfg['training']['save_every'] == 0:
+                    self.save_checkpoint(epoch, avg_metrics['total_loss'], i)
+            # Update learning rate
+            if self.is_master:
+                current_lr = self.optimizer.param_groups[0]['lr']
+                print(f"Epoch {epoch}: Learning rate updated to {current_lr:.2e}")
+            dist.barrier()
+def main():
+    # Initialize the process group
+    dist.init_process_group(backend='nccl')
+    # Get rank and world size from environment variables set by the launcher
+    rank = int(os.environ['RANK'])
+    world_size = int(os.environ['WORLD_SIZE'])
+    local_rank = int(os.environ['LOCAL_RANK'])
+    # Set the device for the current process. This is crucial.
+    torch.cuda.set_device(local_rank)
+    torch.manual_seed(42+rank)
+    with torch.cuda.amp.autocast(dtype=torch.bfloat16):
+        # Pass the distributed info to the Trainer
+        trainer = Trainer(
+            config_path="/gemini/user/private/zhaotianhao/Triposf/config_edge_1024_error_8enc_8dec_woself_finetune_128to1024_addhead.yaml",
+            rank=rank,
+            world_size=world_size,
+            local_rank=local_rank
+        )
+        trainer.train()
+    # Clean up the process group
+    dist.destroy_process_group()
+if __name__ == '__main__':
+    main()

train_slat_vae_512_128to1024_pointnet_addhead.py ADDED Viewed

	@@ -0,0 +1,788 @@

+import torch.nn as nn
+import os
+# os.environ['ATTN_BACKEND'] = 'xformers'
+import yaml
+import torch
+import time
+from datetime import datetime
+from torch.utils.data import DataLoader
+from functools import partial
+from triposf.modules.sparse.basic import SparseTensor
+import torch.nn.functional as F
+from torch.optim import AdamW
+from torch.cuda.amp import GradScaler, autocast
+from triposf.models.triposf_vae.VoxelFeatureVAE_edge_woself_128to1024_decoder_addhead import VoxelVAE
+from vertex_encoder import VoxelFeatureEncoder_active_pointnet, ConnectionHead
+from dataset_triposf_head import VoxelVertexDataset_edge, collate_fn_pointnet
+from utils import load_pretrained_woself, AdaptiveFocalLoss, fast_isin, AsymmetricFocalLoss, DiceLoss, FocalLoss
+import torch.distributed as dist
+from torch.nn.parallel import DistributedDataParallel as DDP
+from torch.utils.data.distributed import DistributedSampler
+from transformers import get_cosine_schedule_with_warmup
+import math
+from torchvision.ops import sigmoid_focal_loss
+import numpy as np
+import open3d as o3d
+def flatten_coords_4d(coords_4d: torch.Tensor):
+    coords_4d_long = coords_4d.long()
+    base_x = 1024
+    base_y = 1024 * 1024
+    base_z = 1024 * 1024 * 1024
+    flat_coords = coords_4d_long[:, 0] * base_z + \
+                  coords_4d_long[:, 1] * base_y + \
+                  coords_4d_long[:, 2] * base_x + \
+                  coords_4d_long[:, 3]
+    return flat_coords
+def downsample_voxels(
+    voxels: torch.Tensor,
+    input_resolution: int,
+    output_resolution: int
+) -> torch.Tensor:
+    if input_resolution % output_resolution != 0:
+        raise ValueError(f"input_resolution ({input_resolution}) must be divisible "
+                         f"by output_resolution ({output_resolution}).")
+    factor = input_resolution // output_resolution
+    downsampled_voxels = voxels.clone().to(torch.long)
+    downsampled_voxels[:, 1:] //= factor
+    unique_downsampled_voxels = torch.unique(downsampled_voxels, dim=0)
+    return unique_downsampled_voxels
+class Trainer:
+    def __init__(self, config_path, rank, world_size, local_rank):
+        self.rank = rank
+        self.world_size = world_size
+        self.local_rank = local_rank
+        self.is_master = self.rank == 0
+        self.load_config(config_path)
+        self.accum_steps = max(1, 8 // self.cfg['training']['batch_size'])
+        self.config_hash = self.save_config_with_hash()
+        self.init_device()
+        self.init_dirs()
+        self.init_components()
+        self.init_training()
+        self.train_loss_history = []
+        self.eval_loss_history = []
+        self.best_eval_loss = float('inf')
+    def save_config_with_hash(self):
+        import hashlib
+        # Serialize config to hash
+        config_str = yaml.dump(self.cfg)
+        config_hash = hashlib.md5(config_str.encode()).hexdigest()[:8]
+        # Prepare all flags as string for formatting
+        add_block_embed_flag = "True" if self.cfg['model']['add_block_embed'] else "False"
+        using_attn_flag = "True" if self.cfg['model']['using_attn'] else "False"
+        dataset_name = os.path.basename(self.cfg['dataset']['path'])
+        # Format save_dir with all placeholders
+        self.cfg['experiment']['save_dir'] = self.cfg['experiment']['save_dir'].format(
+            dataset_name=dataset_name,
+            config_hash=config_hash,
+            n_train_samples=self.cfg['dataset']['n_train_samples'],
+            multires=self.cfg['model']['multires'],
+            add_block_embed=add_block_embed_flag,
+            using_attn=using_attn_flag,
+            batch_size=self.cfg['training']['batch_size'],
+        )
+        if self.is_master:
+            os.makedirs(self.save_dir, exist_ok=True)
+            config_path = os.path.join(self.save_dir, "config.yaml")
+            with open(config_path, 'w') as f:
+                yaml.dump(self.cfg, f)
+        dist.barrier()
+        return config_hash
+    def save_checkpoint(self, epoch, avg_loss, batch_idx):
+        if not self.is_master:
+            return
+        checkpoint_path = os.path.join(self.save_dir, f"checkpoint_epoch{epoch}_batch{batch_idx}_loss{avg_loss:.4f}.pt")
+        config_path = os.path.join(self.save_dir, "config.yaml")
+        torch.save({
+            'voxel_encoder': self.voxel_encoder.module.state_dict(),
+            'vae': self.vae.module.state_dict(),
+            'connection_head': self.connection_head.module.state_dict(),
+            'epoch': epoch,
+            'loss': avg_loss,
+            'config': self.cfg
+        }, checkpoint_path)
+        def quoted_presenter(dumper, data):
+            return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='"')
+        yaml.add_representer(str, quoted_presenter)
+        with open(config_path, 'w') as f:
+            yaml.dump(self.cfg, f)
+    def load_config(self, config_path):
+        with open(config_path) as f:
+            self.cfg = yaml.safe_load(f)
+        # Extract and convert flags for formatting
+        add_block_embed_flag = "True" if self.cfg['model']['add_block_embed'] else "False"
+        using_attn_flag = "True" if self.cfg['model']['using_attn'] else "False"
+        dataset_name = os.path.basename(self.cfg['dataset']['path'])
+        self.save_dir = self.cfg['experiment']['save_dir'].format(
+            dataset_name=dataset_name,
+            n_train_samples=self.cfg['dataset']['n_train_samples'],
+            multires=self.cfg['model']['multires'],
+            add_block_embed=add_block_embed_flag,
+            using_attn=using_attn_flag,
+            batch_size=self.cfg['training']['batch_size'],
+        )
+        if self.is_master:
+            os.makedirs(self.save_dir, exist_ok=True)
+        dist.barrier()
+    def init_device(self):
+        self.device = torch.device(f"cuda:{self.local_rank}")
+    def init_dirs(self):
+        self.log_file = os.path.join(self.save_dir, f"training_log_{self.cfg['training']['lr']}.txt")
+        if self.is_master:
+            with open(self.log_file, "a") as f:
+                current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+                f.write(f"[{current_time}] Config loaded for distributed training with world size {self.world_size}\n")
+    def init_components(self):
+        self.dataset = VoxelVertexDataset_edge(
+            root_dir=self.cfg['dataset']['path'],
+            base_resolution=self.cfg['dataset']['base_resolution'],
+            min_resolution=self.cfg['dataset']['min_resolution'],
+            cache_dir=self.cfg['dataset']['cache_dir'],
+            renders_dir=self.cfg['dataset']['renders_dir'],
+            filter_active_voxels=self.cfg['dataset']['filter_active_voxels'],
+            cache_filter_path=self.cfg['dataset']['cache_filter_path'],
+            active_voxel_res=128,
+            pc_sample_number=819200,
+            sample_type=self.cfg['dataset']['sample_type'],
+        )
+        self.sampler = DistributedSampler(
+            self.dataset,
+            num_replicas=self.world_size,
+            rank=self.rank,
+            shuffle=True,
+        )
+        self.dataloader = DataLoader(
+            self.dataset,
+            batch_size=self.cfg['training']['batch_size'],
+            shuffle=False,
+            collate_fn=partial(collate_fn_pointnet,),
+            num_workers=self.cfg['training']['num_workers'],
+            pin_memory=True,
+            sampler=self.sampler,
+            prefetch_factor=4,
+            persistent_workers=True,
+        )
+        self.voxel_encoder = VoxelFeatureEncoder_active_pointnet(
+            in_channels=15,
+            hidden_dim=256,
+            out_channels=1024,
+            scatter_type='mean',
+            n_blocks=5,
+            resolution=128,
+            add_label=False,
+        ).to(self.device)
+        self.connection_head = ConnectionHead(
+            channels=32 * 2,
+            out_channels=1,
+            mlp_ratio=16,
+        ).to(self.device)
+        # ablation 3: voxelvae_1volume, have tested
+        self.vae = VoxelVAE(
+            in_channels=self.cfg['model']['in_channels'],
+            latent_dim=self.cfg['model']['latent_dim'],
+            encoder_blocks=self.cfg['model']['encoder_blocks'],
+            decoder_blocks_vtx=self.cfg['model']['decoder_blocks_vtx'],
+            decoder_blocks_edge=self.cfg['model']['decoder_blocks_edge'],
+            num_heads=8,
+            num_head_channels=64,
+            mlp_ratio=4.0,
+            attn_mode="swin",
+            window_size=8,
+            pe_mode="ape",
+            use_fp16=False,
+            use_checkpoint=True,
+            qk_rms_norm=False,
+            using_subdivide=True,
+            using_attn=self.cfg['model']['using_attn'],
+            attn_first=self.cfg['model'].get('attn_first', True),
+            pred_direction=self.cfg['model'].get('pred_direction', False),
+        ).to(self.device)
+        if self.cfg['training']['from_pretrained']:
+            load_pretrained_woself(
+                checkpoint_path=self.cfg['training']['checkpoint_path'],
+                voxel_encoder=self.voxel_encoder,
+                vae=self.vae,
+                connection_head=self.connection_head,
+                optimizer=None,
+            )
+        self.voxel_encoder = DDP(self.voxel_encoder, device_ids=[self.local_rank], find_unused_parameters=False)
+        self.connection_head = DDP(self.connection_head, device_ids=[self.local_rank], find_unused_parameters=False)
+        self.vae = DDP(self.vae, device_ids=[self.local_rank], find_unused_parameters=True)
+    def init_training(self):
+        self.optimizer = AdamW(
+            list(self.vae.module.parameters()) +
+            list(self.voxel_encoder.module.parameters()) +
+            list(self.connection_head.module.parameters()),
+            lr=self.cfg['training']['lr'],
+            weight_decay=0.01,
+        )
+        num_update_steps_per_epoch = math.ceil(len(self.dataloader) / self.accum_steps)
+        max_epochs = self.cfg['training']['max_epochs']
+        num_training_steps = max_epochs * num_update_steps_per_epoch
+        num_warmup_steps = 400
+        self.scheduler = get_cosine_schedule_with_warmup(
+            self.optimizer,
+            num_warmup_steps=num_warmup_steps,
+            num_training_steps=num_training_steps
+        )
+        self.focal_loss = AdaptiveFocalLoss(gamma=2.0, max_alpha=10.0).to(self.device)
+        self.mse_loss = nn.MSELoss(reduction='mean').to(self.device)
+        self.asyloss = AsymmetricFocalLoss(
+            gamma_pos=0.0,
+            gamma_neg=4.0,
+            clip=0.05,
+        )
+        self.bce_loss = torch.nn.BCEWithLogitsLoss()
+        self.dice_loss = DiceLoss()
+        self.scaler = GradScaler()
+    def train_step(self, batch, b_idx):
+        """Modified training step that handles vertex and edge voxels separately after initial prediction."""
+        # 1. Retrieve data from batch
+        combined_voxels_1024 = batch['combined_voxels_1024'].to(self.device)
+        combined_voxel_labels_1024 = batch['combined_voxel_labels_1024'].to(self.device)
+        gt_vertex_voxels_1024 = batch['gt_vertex_voxels_1024'].to(self.device)
+        # gt_edge_voxels_1024 = batch['gt_edge_voxels_1024'].to(self.device)
+        # gt_combined_endpoints_1024 = batch['gt_combined_endpoints_1024'].to(self.device)
+        # gt_combined_errors_1024 = batch['gt_combined_errors_1024'].to(self.device)
+        # gt_edges = batch['gt_vertex_edge_indices_256'].to(self.device)
+        gt_edges = batch['gt_vertex_edge_indices_1024'].to(self.device)
+        edge_mask = (combined_voxel_labels_1024 == 1)
+        # gt_edge_endpoints_1024 = gt_combined_endpoints_1024[edge_mask]
+        # gt_edge_errors_1024 = gt_combined_errors_1024[edge_mask]
+        # p1 = gt_edge_endpoints_1024[:, 1:4].float()
+        # p2 = gt_edge_endpoints_1024[:, 4:7].float()
+        # mask = ( (p1[:,0] < p2[:,0]) |
+        #         ((p1[:,0] == p2[:,0]) & (p1[:,1] < p2[:,1])) |
+        #         ((p1[:,0] == p2[:,0]) & (p1[:,1] == p2[:,1]) & (p1[:,2] <= p2[:,2])) )
+        # pA = torch.where(mask[:, None], p1, p2)  # smaller one
+        # pB = torch.where(mask[:, None], p2, p1)  # larger one
+        # d = pB - pA
+        # dir_gt = F.normalize(d, dim=-1, eps=1e-6)
+        vtx_128 = downsample_voxels(gt_vertex_voxels_1024, input_resolution=1024, output_resolution=128)
+        vtx_256 = downsample_voxels(gt_vertex_voxels_1024, input_resolution=1024, output_resolution=256)
+        vtx_512 = downsample_voxels(gt_vertex_voxels_1024, input_resolution=1024, output_resolution=512)
+        vtx_1024 = gt_vertex_voxels_1024
+        edge_128 = downsample_voxels(combined_voxels_1024, input_resolution=1024, output_resolution=128)
+        edge_256 = downsample_voxels(combined_voxels_1024, input_resolution=1024, output_resolution=256)
+        edge_512 = downsample_voxels(combined_voxels_1024, input_resolution=1024, output_resolution=512)
+        edge_1024 = combined_voxels_1024
+        active_coords = batch['active_voxels_128'].to(self.device)
+        point_cloud = batch['point_cloud_128'].to(self.device)
+        with autocast(dtype=torch.bfloat16):
+            active_voxel_feats = self.voxel_encoder(
+                p=point_cloud,
+                sparse_coords=active_coords,
+                res=128,
+                bbox_size=(-0.5, 0.5),
+            )
+            sparse_input = SparseTensor(
+                feats=active_voxel_feats,
+                coords=active_coords.int()
+            )
+            gt_edge_voxels_list = [
+                edge_128,
+                edge_256,
+                edge_512,
+                edge_1024,
+            ]
+            gt_vertex_voxels_list = [
+                vtx_128,
+                vtx_256,
+                vtx_512,
+                vtx_1024,
+            ]
+            results, posterior, latent_128 = self.vae(
+                sparse_input,
+                gt_vertex_voxels_list=gt_vertex_voxels_list,
+                gt_edge_voxels_list=gt_edge_voxels_list,
+                training=True,
+                sample_ratio=0.,
+            )
+            total_loss = 0.
+            prune_loss_total = 0.
+            vertex_loss_total = 0.
+            edge_loss_total=0.
+            initial_result = results[0]
+            vertex_mask = initial_result['vertex_mask']
+            vtx_logits = initial_result['vtx_feats']
+            vertex_loss = self.asyloss(vtx_logits.squeeze(-1), vertex_mask.float())
+            edge_mask = initial_result['edge_mask']
+            edge_logits = initial_result['edge_feats']
+            edge_loss = self.asyloss(edge_logits.squeeze(-1), edge_mask.float())
+            # edge_loss = self.bce_loss(edge_logits.squeeze(-1), edge_mask.float())
+            vertex_loss_total += vertex_loss
+            edge_loss_total += edge_loss
+            total_loss += vertex_loss
+            total_loss += edge_loss
+            # Process each level's results
+            for idx, res_dict in enumerate(results[1:], start=1):
+                # Vertex branch losses
+                vertex_pred_coords = res_dict['vertex']['occ_coords']
+                vertex_occ_probs = res_dict['vertex']['occ_probs']
+                vertex_gt_coords = res_dict['vertex']['coords']
+                vertex_labels = fast_isin(vertex_pred_coords, vertex_gt_coords, resolution=1024).float()
+                # print('vertex_labels.sum()', vertex_labels.sum(), idx)
+                vertex_logits = vertex_occ_probs.squeeze()
+                # if vertex_labels.sum() > 0 and vertex_labels.sum() < len(vertex_labels):
+                vertex_prune_loss = self.focal_loss(vertex_logits, vertex_labels)
+                prune_loss_total += vertex_prune_loss
+                total_loss += vertex_prune_loss
+                # Edge branch losses
+                edge_pred_coords = res_dict['edge']['occ_coords']
+                edge_occ_probs = res_dict['edge']['occ_probs']
+                edge_gt_coords = res_dict['edge']['coords']
+                edge_labels = fast_isin(edge_pred_coords, edge_gt_coords, resolution=1024).float()
+                edge_logits = edge_occ_probs.squeeze()
+                edge_prune_loss = self.focal_loss(edge_logits, edge_labels)
+                prune_loss_total += edge_prune_loss
+                total_loss += edge_prune_loss
+                if idx == 3:
+                    mse_loss_feats = torch.tensor(0., device=self.device)
+                    mse_loss_dirs = torch.tensor(0., device=self.device)
+                    # connection_loss = torch.tensor(0., device=self.device)
+                    # --- Vertex Branch (Connection Loss 核心) ---
+                    vtx_pred_coords = res_dict['vertex']['coords_4d']  # [N, 4]
+                    vtx_pred_feats  = res_dict['vertex']['feats']      # [N, C]
+                    # 1.1 排序 (既用于匹配 GT，也用于快速寻找空间邻居)
+                    vtx_pred_keys = flatten_coords_4d(vtx_pred_coords)
+                    vtx_pred_keys_sorted, vtx_pred_order = torch.sort(vtx_pred_keys)
+                    # 1.2 匹配 GT
+                    vtx_gt_keys = flatten_coords_4d(gt_vertex_voxels_1024.to(self.device))
+                    vtx_pos = torch.searchsorted(vtx_pred_keys_sorted, vtx_gt_keys)
+                    vtx_pos = vtx_pos.clamp(max=len(vtx_pred_keys_sorted) - 1)
+                    vtx_match_mask = (vtx_pred_keys_sorted[vtx_pos] == vtx_gt_keys)
+                    gt_to_pred_mapping = torch.full((len(vtx_gt_keys),), -1, device=self.device, dtype=torch.long)
+                    matched_pred_indices = vtx_pred_order[vtx_pos[vtx_match_mask]]
+                    gt_to_pred_mapping[vtx_match_mask] = matched_pred_indices
+                    # ====================================================
+                    # 2. 构建核心数据：正样本 Hash 集合
+                    # ====================================================
+                    # 这里的 pos_u/pos_v 仅用于构建 "什么是真连接" 的查询表
+                    u_gt, v_gt = gt_edges[:, 0], gt_edges[:, 1]
+                    u_pred = gt_to_pred_mapping[u_gt]
+                    v_pred = gt_to_pred_mapping[v_gt]
+                    valid_edge_mask = (u_pred != -1) & (v_pred != -1)
+                    real_pos_u = u_pred[valid_edge_mask]
+                    real_pos_v = v_pred[valid_edge_mask]
+                    num_real_pos = real_pos_u.shape[0]
+                    num_total_nodes = vtx_pred_coords.shape[0]
+                    if num_real_pos > 0:
+                        # 2. 构建候选样本 (Candidate Generation)
+                        # ====================================================
+                        cand_u_list = []
+                        cand_v_list = []
+                        batch_ids = vtx_pred_coords[:, 0]
+                        unique_batches = torch.unique(batch_ids)
+                        RADIUS = 64
+                        MAX_PTS_FOR_DIST = 12000
+                        K_RANDOM = 32
+                        for b_id in unique_batches:
+                            mask_b = (batch_ids == b_id)
+                            indices_b = torch.nonzero(mask_b).squeeze(-1) # Global indices
+                            coords_b = vtx_pred_coords[mask_b, 1:4].float() # (x,y,z)
+                            num_b = coords_b.shape[0]
+                            if num_b < 2: continue
+                            # --- A. Radius Graph (Hard Negatives) ---
+                            if num_b <= MAX_PTS_FOR_DIST:
+                                # 计算距离矩阵 [M, M]
+                                # 注意：autocast 下 float16 的 cdist 可能精度不够，建议转 float32
+                                dist_mat = torch.cdist(coords_b.float(), coords_b.float())
+                                # 找到距离小于 Radius 的点对 (排除自环)
+                                adj_mat = (dist_mat < RADIUS) & (dist_mat > 1e-6)
+                                # 提取索引 (local indices in batch)
+                                src_local, dst_local = torch.nonzero(adj_mat, as_tuple=True)
+                                # 映射回全局索引
+                                cand_u_list.append(indices_b[src_local])
+                                cand_v_list.append(indices_b[dst_local])
+                            else:
+                                print('num_b is big!')
+                                pass
+                            # --- B. Random Sampling (Easy Negatives) ---
+                            # 随机生成 num_b * K 对
+                            n_rand = num_b * K_RANDOM
+                            rand_src_local = torch.randint(0, num_b, (n_rand,), device=self.device)
+                            rand_dst_local = torch.randint(0, num_b, (n_rand,), device=self.device)
+                            # 映射回全局索引
+                            cand_u_list.append(indices_b[rand_src_local])
+                            cand_v_list.append(indices_b[rand_dst_local])
+                        # 合并所有来源 (GT + Radius + Random)
+                        # 注意：我们把 real_pos 也加进来，确保正样本一定在列表里
+                        all_u = torch.cat([real_pos_u] + cand_u_list)
+                        all_v = torch.cat([real_pos_v] + cand_v_list)
+                        # 3. 去重与 Labeling (Deduplication & Labeling)
+                        # ====================================================
+                        # 构造无向边 Hash: min * N + max
+                        # 确保 MAX_NODES 足够大，比如 1000000 或 num_total_nodes
+                        HASH_BASE = num_total_nodes + 100
+                        p_min = torch.min(all_u, all_v)
+                        p_max = torch.max(all_u, all_v)
+                        # 过滤掉自环 (u==v)
+                        valid_pair = (p_min != p_max)
+                        p_min = p_min[valid_pair]
+                        p_max = p_max[valid_pair]
+                        all_hashes = p_min.long() * HASH_BASE + p_max.long()
+                        # --- 核心：去重 ---
+                        unique_hashes = torch.unique(all_hashes)
+                        # 解码回 u, v
+                        final_u = unique_hashes // HASH_BASE
+                        final_v = unique_hashes % HASH_BASE
+                        # --- Labeling ---
+                        # 构建 GT 的 Hash 表用于查询
+                        gt_min = torch.min(real_pos_u, real_pos_v)
+                        gt_max = torch.max(real_pos_u, real_pos_v)
+                        gt_hashes = gt_min.long() * HASH_BASE + gt_max.long()
+                        gt_hashes = torch.unique(gt_hashes) # GT 也去重一下保险
+                        gt_hashes_sorted, _ = torch.sort(gt_hashes)
+                        # 查询 unique_hashes 是否在 gt_hashes 中
+                        # 使用 searchsorted
+                        idx_search = torch.searchsorted(gt_hashes_sorted, unique_hashes)
+                        idx_search = idx_search.clamp(max=len(gt_hashes_sorted) - 1)
+                        is_connected = (gt_hashes_sorted[idx_search] == unique_hashes)
+                        targets = is_connected.float().unsqueeze(-1) # [N_pairs, 1]
+                        # ============================================================
+                        # 4. 前向传播与 Loss
+                        # ====================================================
+                        feat_u = vtx_pred_feats[final_u]
+                        feat_v = vtx_pred_feats[final_v]
+                        # 对称特征融合
+                        feat_uv = torch.cat([feat_u, feat_v], dim=-1)
+                        feat_vu = torch.cat([feat_v, feat_u], dim=-1)
+                        logits_uv = self.connection_head(feat_uv)
+                        logits_vu = self.connection_head(feat_vu)
+                        logits = (logits_uv + logits_vu) / 2.
+                        connection_loss = self.asyloss(logits, targets)
+                        total_loss += connection_loss
+                    else:
+                        connection_loss = torch.tensor(0., device=self.device)
+                # KL loss
+                kl_loss = posterior.kl(dims=(1,)).mean() * 1e-3 # 1e-3 before
+                total_loss += kl_loss
+        # Backpropagation
+        scaled_total_loss = total_loss / self.accum_steps
+        # self.scaler.scale(scaled_total_loss).backward()
+        scaled_total_loss.backward()
+        return {
+            'total_loss': total_loss.item(),
+            'kl_loss': kl_loss.item(),
+            'prune_loss': prune_loss_total.item(),
+            'vertex_loss': vertex_loss_total.item(),
+            'edge_loss': edge_loss_total.item(),
+            'offset_loss': mse_loss_feats.item(),
+            'direction_loss': mse_loss_dirs.item(),
+            'connection_loss': connection_loss.item(),
+        }
+    def train(self):
+        accum_steps = self.accum_steps
+        for epoch in range(self.cfg['training']['start_epoch'], self.cfg['training']['max_epochs']):
+            self.dataloader.sampler.set_epoch(epoch)
+            # Initialize metrics
+            metrics = {
+                'total_loss': 0.0,
+                'kl_loss': 0.0,
+                'prune_loss': 0.0,
+                'vertex_loss': 0.0,
+                'edge_loss': 0.0,
+                'offset_loss': 0.0,
+                'direction_loss': 0.0,
+                'connection_loss': 0.0,
+            }
+            num_batches = 0
+            self.optimizer.zero_grad(set_to_none=True)
+            for i, batch in enumerate(self.dataloader):
+                # Get all losses from train_step
+                if batch is None:
+                    continue
+                step_losses = self.train_step(batch, i)
+                # Accumulate losses
+                for key in metrics:
+                    metrics[key] += step_losses[key]
+                num_batches += 1
+                if (i + 1) % accum_steps == 0:
+                    # self.scaler.unscale_(self.optimizer)
+                    # torch.nn.utils.clip_grad_norm_(self.vae.parameters(), max_norm=0.5)
+                    # torch.nn.utils.clip_grad_norm_(self.voxel_encoder.parameters(), max_norm=0.5)
+                    # torch.nn.utils.clip_grad_norm_(self.connection_head.parameters(), max_norm=0.5)
+                    # self.scaler.step(self.optimizer)
+                    # self.scaler.update()
+                    # self.optimizer.zero_grad(set_to_none=True)
+                    # self.scheduler.step()
+                    torch.nn.utils.clip_grad_norm_(self.vae.parameters(), max_norm=0.5)
+                    torch.nn.utils.clip_grad_norm_(self.voxel_encoder.parameters(), max_norm=0.5)
+                    torch.nn.utils.clip_grad_norm_(self.connection_head.parameters(), max_norm=0.5)
+                    self.optimizer.step()
+                    self.optimizer.zero_grad(set_to_none=True)
+                    self.scheduler.step()
+                # Print batch-level metrics
+                if self.is_master:
+                    avg_metric = {key: value / num_batches for key, value in metrics.items()}
+                    print(
+                        f"[Epoch {epoch}] Batch:{num_batches} "
+                        f"AvgL:{avg_metric['total_loss']:.4f} "
+                        f"Loss: {step_losses['total_loss']:.4f}, "
+                        f"KLL: {step_losses['kl_loss']:.4f}, "
+                        f"PruneL: {step_losses['prune_loss']:.4f}, "
+                        f"VertexL: {step_losses['vertex_loss']:.4f}, "
+                        f"EdgeL: {step_losses['edge_loss']:.4f}, "
+                        f"OffsetL: {step_losses['offset_loss']:.4f}, "
+                        f"DireL: {step_losses['direction_loss']:.4f}, "
+                        f"ConL: {step_losses['connection_loss']:.4f}, "
+                        f"LR: {self.optimizer.param_groups[0]['lr']:.4e} "
+                    )
+                    if i % 2000 == 0 and i != 0:
+                        self.save_checkpoint(epoch, avg_metric['total_loss'], i)
+                        with open(self.log_file, "a") as f:
+                            current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+                            log_line = (
+                                f"Epoch {epoch:05d} | "
+                                f"Batch {i:05d} | "
+                                f"Loss: {avg_metric['total_loss']:.6f} "
+                                f"Avg KLL: {avg_metric['kl_loss']:.4f} "
+                                f"Avg PruneL: {avg_metric['prune_loss']:.4f} "
+                                f"Avg VertexL: {avg_metric['vertex_loss']:.4f} "
+                                f"Avg EdgeL: {avg_metric['edge_loss']:.4f} "
+                                f"Avg OffsetL: {avg_metric['offset_loss']:.4f} "
+                                f"Avg DireL: {avg_metric['direction_loss']:.4f} "
+                                f"Avg ConL: {avg_metric['connection_loss']:.4f} "
+                                f"LR: {self.optimizer.param_groups[0]['lr']:.4e} "
+                                f"[{current_time}]\n"
+                            )
+                            f.write(log_line)
+            if num_batches % accum_steps != 0:
+                # self.scaler.unscale_(self.optimizer)
+                # torch.nn.utils.clip_grad_norm_(self.vae.parameters(), max_norm=0.5)
+                # torch.nn.utils.clip_grad_norm_(self.voxel_encoder.parameters(), max_norm=0.5)
+                # torch.nn.utils.clip_grad_norm_(self.connection_head.parameters(), max_norm=0.5)
+                # self.scaler.step(self.optimizer)
+                # self.scaler.update()
+                # self.optimizer.zero_grad(set_to_none=True)
+                # self.scheduler.step()
+                torch.nn.utils.clip_grad_norm_(self.vae.parameters(), max_norm=0.5)
+                torch.nn.utils.clip_grad_norm_(self.voxel_encoder.parameters(), max_norm=0.5)
+                torch.nn.utils.clip_grad_norm_(self.connection_head.parameters(), max_norm=0.5)
+                self.optimizer.step()
+                self.optimizer.zero_grad(set_to_none=True)
+                self.scheduler.step()
+            # Calculate epoch averages
+            avg_metrics = {key: value / num_batches for key, value in metrics.items()}
+            self.train_loss_history.append(avg_metrics['total_loss'])
+            # Log to file
+            if self.is_master:
+                with open(self.log_file, "a") as f:
+                    current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+                    log_line = (
+                        f"Epoch {epoch:05d} | "
+                        f"Loss: {avg_metrics['total_loss']:.6f} "
+                        f"Avg KLL: {avg_metrics['kl_loss']:.4f} "
+                        f"Avg PruneL: {avg_metrics['prune_loss']:.4f} "
+                        f"Avg VertexL: {avg_metrics['vertex_loss']:.4f} "
+                        f"Avg EdgeL: {avg_metrics['edge_loss']:.4f} "
+                        f"Avg OffsetL: {avg_metrics['offset_loss']:.4f} "
+                        f"Avg DireL: {avg_metrics['direction_loss']:.4f} "
+                        f"Avg ConL: {avg_metrics['connection_loss']:.4f} "
+                        f"LR: {self.optimizer.param_groups[0]['lr']:.4e} "
+                        f"[{current_time}]\n"
+                    )
+                    f.write(log_line)
+                # Print epoch summary
+                print(
+                    f"[Epoch {epoch}] "
+                    f"Avg Loss: {avg_metrics['total_loss']:.4f} "
+                    f"Avg KLL: {avg_metrics['kl_loss']:.4f} "
+                    f"Avg PruneL: {avg_metrics['prune_loss']:.4f} "
+                    f"Avg VertexL: {avg_metrics['vertex_loss']:.4f} "
+                    f"Avg EdgeL: {avg_metrics['edge_loss']:.4f} "
+                    f"Avg OffsetL: {avg_metrics['offset_loss']:.4f} "
+                    f"Avg DireL: {avg_metrics['direction_loss']:.4f} "
+                    f"Avg ConL: {avg_metrics['connection_loss']:.4f} "
+                    f"[{current_time}]\n"
+                )
+                # Save checkpoint
+                if epoch % self.cfg['training']['save_every'] == 0:
+                    self.save_checkpoint(epoch, avg_metrics['total_loss'], i)
+            # Update learning rate
+            if self.is_master:
+                current_lr = self.optimizer.param_groups[0]['lr']
+                print(f"Epoch {epoch}: Learning rate updated to {current_lr:.2e}")
+            dist.barrier()
+def main():
+    # Initialize the process group
+    dist.init_process_group(backend='nccl')
+    # Get rank and world size from environment variables set by the launcher
+    rank = int(os.environ['RANK'])
+    world_size = int(os.environ['WORLD_SIZE'])
+    local_rank = int(os.environ['LOCAL_RANK'])
+    # Set the device for the current process. This is crucial.
+    torch.cuda.set_device(local_rank)
+    torch.manual_seed(42+rank)
+    # with torch.cuda.amp.autocast(dtype=torch.bfloat16):
+    # Pass the distributed info to the Trainer
+    trainer = Trainer(
+        config_path="/gemini/user/private/zhaotianhao/Triposf/config_edge_1024_error_8enc_8dec_woself_finetune_128to1024_addhead.yaml",
+        rank=rank,
+        world_size=world_size,
+        local_rank=local_rank
+    )
+    trainer.train()
+    # Clean up the process group
+    dist.destroy_process_group()
+if __name__ == '__main__':
+    main()

train_slat_vae_512_128to1024_pointnet_head.py ADDED Viewed

	@@ -0,0 +1,930 @@

+import torch.nn as nn
+import os
+# os.environ['ATTN_BACKEND'] = 'xformers'
+import yaml
+import torch
+import time
+from datetime import datetime
+from torch.utils.data import DataLoader
+from functools import partial
+from triposf.modules.sparse.basic import SparseTensor
+import torch.nn.functional as F
+from torch.optim import AdamW
+from torch.cuda.amp import GradScaler, autocast
+from triposf.models.triposf_vae.VoxelFeatureVAE_edge_woself_128to1024_decoder_head import VoxelVAE
+from vertex_encoder import VoxelFeatureEncoder_active_pointnet, ConnectionHead
+from dataset_triposf_head import VoxelVertexDataset_edge, collate_fn_pointnet
+from utils import load_pretrained_woself, AdaptiveFocalLoss, fast_isin, AsymmetricFocalLoss, DiceLoss, FocalLoss
+import torch.distributed as dist
+from torch.nn.parallel import DistributedDataParallel as DDP
+from torch.utils.data.distributed import DistributedSampler
+from transformers import get_cosine_schedule_with_warmup
+import math
+from torchvision.ops import sigmoid_focal_loss
+import numpy as np
+import open3d as o3d
+def export_sampled_edges(coords, u, v, labels, step_idx, save_dir="debug_viz", batch_idx_to_viz=0):
+    """
+    导出采样边为 PLY 文件。
+    Args:
+        coords: [N, 4] Tensor (batch_idx, x, y, z)
+        u: [E] Tensor, 起点索引 (global index)
+        v: [E] Tensor, 终点索引 (global index)
+        labels: [E, 1] Tensor, 1.0 为正样本, 0.0 为负样本
+        step_idx: 当前步数或 epoch，用于文件名
+        save_dir: 保存目录
+        batch_idx_to_viz: 只可视化哪个 batch 的数据 (防止多个 batch 叠加在一起看不清)
+    """
+    os.makedirs(save_dir, exist_ok=True)
+    # 1. 转为 CPU numpy
+    coords_np = coords.detach().cpu().numpy()
+    u_np = u.detach().cpu().numpy()
+    v_np = v.detach().cpu().numpy()
+    labels_np = labels.detach().cpu().numpy().reshape(-1)
+    # 2. 筛选特定 Batch (通常只看 Batch 0 比较清晰)
+    # coords 的第0列是 batch index
+    batch_mask = (coords_np[:, 0] == batch_idx_to_viz)
+    # 获取属于该 batch 的全局索引范围
+    # 注意：u 和 v 是针对所有 coords 的全局索引。
+    # 我们需要判断一条边的两个端点是否都在这个 batch 内。
+    # 快速检查端点是否在当前 batch
+    valid_u_in_batch = batch_mask[u_np]
+    valid_v_in_batch = batch_mask[v_np]
+    edge_batch_mask = valid_u_in_batch & valid_v_in_batch
+    if edge_batch_mask.sum() == 0:
+        print(f"Warning: No edges found for batch {batch_idx_to_viz}")
+        return
+    # 应用 Batch 筛选
+    u_b = u_np[edge_batch_mask]
+    v_b = v_np[edge_batch_mask]
+    labels_b = labels_np[edge_batch_mask]
+    # 3. 提取该 Batch 的顶点坐标 (去掉 batch_idx 维度)
+    # 此时我们需要重新映射 u, v 的索引，因为我们要只保存该 batch 的点
+    batch_indices_global = np.where(batch_mask)[0]
+    # 创建全局索引到局部索引的映射表
+    global_to_local = {gid: lid for lid, gid in enumerate(batch_indices_global)}
+    points_xyz = coords_np[batch_indices_global, 1:4] # [M, 3]
+    # 转换 u, v 为局部索引
+    try:
+        u_local = np.array([global_to_local[idx] for idx in u_b])
+        v_local = np.array([global_to_local[idx] for idx in v_b])
+    except KeyError:
+        print("Error in index mapping. Edge endpoints might cross batches.")
+        return
+    # 4. 分离正负样本
+    pos_mask = labels_b > 0.5
+    neg_mask = ~pos_mask
+    # 内部函数：写 PLY
+    def write_ply(filename, points, edges_u, edges_v, color_rgb):
+        num_verts = len(points)
+        num_edges = len(edges_u)
+        with open(filename, 'w') as f:
+            f.write("ply\n")
+            f.write("format ascii 1.0\n")
+            f.write(f"element vertex {num_verts}\n")
+            f.write("property float x\n")
+            f.write("property float y\n")
+            f.write("property float z\n")
+            f.write("property uchar red\n")
+            f.write("property uchar green\n")
+            f.write("property uchar blue\n")
+            f.write(f"element edge {num_edges}\n")
+            f.write("property int vertex1\n")
+            f.write("property int vertex2\n")
+            f.write("end_header\n")
+            # Write Vertices with Color
+            # 为了让可视化更清楚，我们将所有点染成指定颜色
+            for i in range(num_verts):
+                x, y, z = points[i]
+                f.write(f"{x:.4f} {y:.4f} {z:.4f} {color_rgb[0]} {color_rgb[1]} {color_rgb[2]}\n")
+            # Write Edges
+            for i in range(num_edges):
+                f.write(f"{edges_u[i]} {edges_v[i]}\n")
+        print(f"Saved: {filename} (Edges: {num_edges})")
+    # 5. 保存正样本 (绿色)
+    if pos_mask.sum() > 0:
+        write_ply(
+            os.path.join(save_dir, f"step_{step_idx}_pos_edges.ply"),
+            points_xyz, # 使用所有点，或者优化为只使用涉及的点（这里为了坐标统一简单起见使用所有点）
+            u_local[pos_mask],
+            v_local[pos_mask],
+            color_rgb=(0, 255, 0) # Green
+        )
+    # 6. 保存负样本 (红色)
+    if neg_mask.sum() > 0:
+        # 为了避免文件太大或太乱，如果负样本特别多，可以考虑随机采样一部分保存
+        # 这里默认全部保存
+        write_ply(
+            os.path.join(save_dir, f"step_{step_idx}_neg_edges.ply"),
+            points_xyz,
+            u_local[neg_mask],
+            v_local[neg_mask],
+            color_rgb=(255, 0, 0) # Red
+        )
+def flatten_coords_4d(coords_4d: torch.Tensor):
+    coords_4d_long = coords_4d.long()
+    base_x = 1024
+    base_y = 1024 * 1024
+    base_z = 1024 * 1024 * 1024
+    flat_coords = coords_4d_long[:, 0] * base_z + \
+                  coords_4d_long[:, 1] * base_y + \
+                  coords_4d_long[:, 2] * base_x + \
+                  coords_4d_long[:, 3]
+    return flat_coords
+def downsample_voxels(
+    voxels: torch.Tensor,
+    input_resolution: int,
+    output_resolution: int
+) -> torch.Tensor:
+    if input_resolution % output_resolution != 0:
+        raise ValueError(f"input_resolution ({input_resolution}) must be divisible "
+                         f"by output_resolution ({output_resolution}).")
+    factor = input_resolution // output_resolution
+    downsampled_voxels = voxels.clone().to(torch.long)
+    downsampled_voxels[:, 1:] //= factor
+    unique_downsampled_voxels = torch.unique(downsampled_voxels, dim=0)
+    return unique_downsampled_voxels
+class Trainer:
+    def __init__(self, config_path, rank, world_size, local_rank):
+        self.rank = rank
+        self.world_size = world_size
+        self.local_rank = local_rank
+        self.is_master = self.rank == 0
+        self.load_config(config_path)
+        self.accum_steps = max(1, 4 // self.cfg['training']['batch_size'])
+        self.config_hash = self.save_config_with_hash()
+        self.init_device()
+        self.init_dirs()
+        self.init_components()
+        self.init_training()
+        self.train_loss_history = []
+        self.eval_loss_history = []
+        self.best_eval_loss = float('inf')
+    def save_config_with_hash(self):
+        import hashlib
+        # Serialize config to hash
+        config_str = yaml.dump(self.cfg)
+        config_hash = hashlib.md5(config_str.encode()).hexdigest()[:8]
+        # Prepare all flags as string for formatting
+        add_block_embed_flag = "True" if self.cfg['model']['add_block_embed'] else "False"
+        using_attn_flag = "True" if self.cfg['model']['using_attn'] else "False"
+        dataset_name = os.path.basename(self.cfg['dataset']['path'])
+        # Format save_dir with all placeholders
+        self.cfg['experiment']['save_dir'] = self.cfg['experiment']['save_dir'].format(
+            dataset_name=dataset_name,
+            config_hash=config_hash,
+            n_train_samples=self.cfg['dataset']['n_train_samples'],
+            multires=self.cfg['model']['multires'],
+            add_block_embed=add_block_embed_flag,
+            using_attn=using_attn_flag,
+            batch_size=self.cfg['training']['batch_size'],
+        )
+        if self.is_master:
+            os.makedirs(self.save_dir, exist_ok=True)
+            config_path = os.path.join(self.save_dir, "config.yaml")
+            with open(config_path, 'w') as f:
+                yaml.dump(self.cfg, f)
+        dist.barrier()
+        return config_hash
+    def save_checkpoint(self, epoch, avg_loss, batch_idx):
+        if not self.is_master:
+            return
+        checkpoint_path = os.path.join(self.save_dir, f"checkpoint_epoch{epoch}_batch{batch_idx}_loss{avg_loss:.4f}.pt")
+        config_path = os.path.join(self.save_dir, "config.yaml")
+        torch.save({
+            'voxel_encoder': self.voxel_encoder.module.state_dict(),
+            'vae': self.vae.module.state_dict(),
+            'connection_head': self.connection_head.module.state_dict(),
+            'epoch': epoch,
+            'loss': avg_loss,
+            'config': self.cfg
+        }, checkpoint_path)
+        def quoted_presenter(dumper, data):
+            return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='"')
+        yaml.add_representer(str, quoted_presenter)
+        with open(config_path, 'w') as f:
+            yaml.dump(self.cfg, f)
+    def load_config(self, config_path):
+        with open(config_path) as f:
+            self.cfg = yaml.safe_load(f)
+        # Extract and convert flags for formatting
+        add_block_embed_flag = "True" if self.cfg['model']['add_block_embed'] else "False"
+        using_attn_flag = "True" if self.cfg['model']['using_attn'] else "False"
+        dataset_name = os.path.basename(self.cfg['dataset']['path'])
+        self.save_dir = self.cfg['experiment']['save_dir'].format(
+            dataset_name=dataset_name,
+            n_train_samples=self.cfg['dataset']['n_train_samples'],
+            multires=self.cfg['model']['multires'],
+            add_block_embed=add_block_embed_flag,
+            using_attn=using_attn_flag,
+            batch_size=self.cfg['training']['batch_size'],
+        )
+        if self.is_master:
+            os.makedirs(self.save_dir, exist_ok=True)
+        dist.barrier()
+    def init_device(self):
+        self.device = torch.device(f"cuda:{self.local_rank}")
+    def init_dirs(self):
+        self.log_file = os.path.join(self.save_dir, f"training_log_{self.cfg['training']['lr']}.txt")
+        if self.is_master:
+            with open(self.log_file, "a") as f:
+                current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+                f.write(f"[{current_time}] Config loaded for distributed training with world size {self.world_size}\n")
+    def init_components(self):
+        self.dataset = VoxelVertexDataset_edge(
+            root_dir=self.cfg['dataset']['path'],
+            base_resolution=self.cfg['dataset']['base_resolution'],
+            min_resolution=self.cfg['dataset']['min_resolution'],
+            cache_dir=self.cfg['dataset']['cache_dir'],
+            renders_dir=self.cfg['dataset']['renders_dir'],
+            filter_active_voxels=self.cfg['dataset']['filter_active_voxels'],
+            cache_filter_path=self.cfg['dataset']['cache_filter_path'],
+            active_voxel_res=128,
+            pc_sample_number=819200,
+            sample_type=self.cfg['dataset']['sample_type'],
+        )
+        self.sampler = DistributedSampler(
+            self.dataset,
+            num_replicas=self.world_size,
+            rank=self.rank,
+            shuffle=True,
+        )
+        self.dataloader = DataLoader(
+            self.dataset,
+            batch_size=self.cfg['training']['batch_size'],
+            shuffle=False,
+            collate_fn=partial(collate_fn_pointnet,),
+            num_workers=self.cfg['training']['num_workers'],
+            pin_memory=True,
+            sampler=self.sampler,
+            prefetch_factor=4,
+            persistent_workers=True,
+        )
+        self.voxel_encoder = VoxelFeatureEncoder_active_pointnet(
+            in_channels=15,
+            hidden_dim=256,
+            out_channels=1024,
+            scatter_type='mean',
+            n_blocks=5,
+            resolution=128,
+            add_label=False,
+        ).to(self.device)
+        self.connection_head = ConnectionHead(
+            channels=32 * 2,
+            out_channels=1,
+            mlp_ratio=16,
+        ).to(self.device)
+        # self.connection_head = ConnectionHead(
+        #     channels=64 * 2,
+        #     out_channels=1,
+        #     mlp_ratio=8,
+        # ).to(self.device)
+        # ablation 3: voxelvae_1volume, have tested
+        self.vae = VoxelVAE(
+            in_channels=self.cfg['model']['in_channels'],
+            latent_dim=self.cfg['model']['latent_dim'],
+            encoder_blocks=self.cfg['model']['encoder_blocks'],
+            decoder_blocks_vtx=self.cfg['model']['decoder_blocks_vtx'],
+            decoder_blocks_edge=self.cfg['model']['decoder_blocks_edge'],
+            num_heads=8,
+            num_head_channels=64,
+            mlp_ratio=4.0,
+            attn_mode="swin",
+            window_size=8,
+            pe_mode="ape",
+            use_fp16=False,
+            use_checkpoint=True,
+            qk_rms_norm=False,
+            using_subdivide=True,
+            using_attn=self.cfg['model']['using_attn'],
+            attn_first=self.cfg['model'].get('attn_first', True),
+            pred_direction=self.cfg['model'].get('pred_direction', False),
+        ).to(self.device)
+        if self.cfg['training']['from_pretrained']:
+            load_pretrained_woself(
+                checkpoint_path=self.cfg['training']['checkpoint_path'],
+                voxel_encoder=self.voxel_encoder,
+                vae=self.vae,
+                connection_head=self.connection_head,
+                optimizer=None,
+            )
+        self.voxel_encoder = DDP(self.voxel_encoder, device_ids=[self.local_rank], find_unused_parameters=False)
+        self.connection_head = DDP(self.connection_head, device_ids=[self.local_rank], find_unused_parameters=False)
+        self.vae = DDP(self.vae, device_ids=[self.local_rank], find_unused_parameters=False)
+    def init_training(self):
+        self.optimizer = AdamW(
+            list(self.vae.module.parameters()) +
+            list(self.voxel_encoder.module.parameters()) +
+            list(self.connection_head.module.parameters()),
+            lr=self.cfg['training']['lr'],
+            weight_decay=0.01,
+        )
+        num_update_steps_per_epoch = math.ceil(len(self.dataloader) / self.accum_steps)
+        max_epochs = self.cfg['training']['max_epochs']
+        num_training_steps = max_epochs * num_update_steps_per_epoch
+        num_warmup_steps = 200
+        self.scheduler = get_cosine_schedule_with_warmup(
+            self.optimizer,
+            num_warmup_steps=num_warmup_steps,
+            num_training_steps=num_training_steps
+        )
+        self.focal_loss = AdaptiveFocalLoss(gamma=2.0, max_alpha=10.0).to(self.device)
+        # self.focal_loss = FocalLoss(gamma=2, alpha=0.6)
+        self.mse_loss = nn.MSELoss(reduction='mean').to(self.device)
+        self.asyloss = AsymmetricFocalLoss(
+            gamma_pos=0.0,
+            gamma_neg=4.0,
+            clip=0.05,
+        )
+        self.bce_loss = torch.nn.BCEWithLogitsLoss()
+        self.dice_loss = DiceLoss()
+        self.scaler = GradScaler()
+    def train_step(self, batch, b_idx):
+        """Modified training step that handles vertex and edge voxels separately after initial prediction."""
+        # 1. Retrieve data from batch
+        combined_voxels_1024 = batch['combined_voxels_1024'].to(self.device)
+        combined_voxel_labels_1024 = batch['combined_voxel_labels_1024'].to(self.device)
+        gt_vertex_voxels_1024 = batch['gt_vertex_voxels_1024'].to(self.device)
+        # gt_edge_voxels_1024 = batch['gt_edge_voxels_1024'].to(self.device)
+        # gt_combined_endpoints_1024 = batch['gt_combined_endpoints_1024'].to(self.device)
+        # gt_combined_errors_1024 = batch['gt_combined_errors_1024'].to(self.device)
+        # gt_edges = batch['gt_vertex_edge_indices_256'].to(self.device)
+        gt_edges = batch['gt_vertex_edge_indices_1024'].to(self.device)
+        edge_mask = (combined_voxel_labels_1024 == 1)
+        # gt_edge_endpoints_1024 = gt_combined_endpoints_1024[edge_mask]
+        # gt_edge_errors_1024 = gt_combined_errors_1024[edge_mask]
+        # p1 = gt_edge_endpoints_1024[:, 1:4].float()
+        # p2 = gt_edge_endpoints_1024[:, 4:7].float()
+        # mask = ( (p1[:,0] < p2[:,0]) |
+        #         ((p1[:,0] == p2[:,0]) & (p1[:,1] < p2[:,1])) |
+        #         ((p1[:,0] == p2[:,0]) & (p1[:,1] == p2[:,1]) & (p1[:,2] <= p2[:,2])) )
+        # pA = torch.where(mask[:, None], p1, p2)  # smaller one
+        # pB = torch.where(mask[:, None], p2, p1)  # larger one
+        # d = pB - pA
+        # dir_gt = F.normalize(d, dim=-1, eps=1e-6)
+        vtx_128 = downsample_voxels(gt_vertex_voxels_1024, input_resolution=1024, output_resolution=128)
+        vtx_256 = downsample_voxels(gt_vertex_voxels_1024, input_resolution=1024, output_resolution=256)
+        vtx_512 = downsample_voxels(gt_vertex_voxels_1024, input_resolution=1024, output_resolution=512)
+        vtx_1024 = gt_vertex_voxels_1024
+        edge_128 = downsample_voxels(combined_voxels_1024, input_resolution=1024, output_resolution=128)
+        edge_256 = downsample_voxels(combined_voxels_1024, input_resolution=1024, output_resolution=256)
+        edge_512 = downsample_voxels(combined_voxels_1024, input_resolution=1024, output_resolution=512)
+        edge_1024 = combined_voxels_1024
+        active_coords = batch['active_voxels_128'].to(self.device)
+        point_cloud = batch['point_cloud_128'].to(self.device)
+        with autocast(dtype=torch.bfloat16):
+            active_voxel_feats = self.voxel_encoder(
+                p=point_cloud,
+                sparse_coords=active_coords,
+                res=128,
+                bbox_size=(-0.5, 0.5),
+            )
+            sparse_input = SparseTensor(
+                feats=active_voxel_feats,
+                coords=active_coords.int()
+            )
+            gt_edge_voxels_list = [
+                edge_128,
+                edge_256,
+                edge_512,
+                edge_1024,
+            ]
+            gt_vertex_voxels_list = [
+                vtx_128,
+                vtx_256,
+                vtx_512,
+                vtx_1024,
+            ]
+            results, posterior, latent_128 = self.vae(
+                sparse_input,
+                gt_vertex_voxels_list=gt_vertex_voxels_list,
+                gt_edge_voxels_list=gt_edge_voxels_list,
+                training=True,
+                sample_ratio=0.,
+            )
+            # print("results[-1]['edge']['coords_4d'][1827:1830]", results[-1]['edge']['coords_4d'][1827:1830])
+            total_loss = 0.
+            prune_loss_total = 0.
+            vertex_loss_total = 0.
+            edge_loss_total=0.
+            initial_result = results[0]
+            vertex_mask = initial_result['vertex_mask']
+            vtx_logits = initial_result['vtx_feats']
+            vertex_loss = self.asyloss(vtx_logits.squeeze(-1), vertex_mask.float())
+            edge_mask = initial_result['edge_mask']
+            edge_logits = initial_result['edge_feats']
+            edge_loss = self.asyloss(edge_logits.squeeze(-1), edge_mask.float())
+            # edge_loss = self.bce_loss(edge_logits.squeeze(-1), edge_mask.float())
+            vertex_loss_total += vertex_loss
+            edge_loss_total += edge_loss
+            total_loss += vertex_loss
+            total_loss += edge_loss
+            # Process each level's results
+            for idx, res_dict in enumerate(results[1:], start=1):
+                # Vertex branch losses
+                vertex_pred_coords = res_dict['vertex']['occ_coords']
+                vertex_occ_probs = res_dict['vertex']['occ_probs']
+                vertex_gt_coords = res_dict['vertex']['coords']
+                vertex_labels = fast_isin(vertex_pred_coords, vertex_gt_coords, resolution=1024).float()
+                # print('vertex_labels.sum()', vertex_labels.sum(), idx)
+                vertex_logits = vertex_occ_probs.squeeze()
+                # if vertex_labels.sum() > 0 and vertex_labels.sum() < len(vertex_labels):
+                vertex_prune_loss = self.asyloss(vertex_logits, vertex_labels)
+                prune_loss_total += vertex_prune_loss
+                total_loss += vertex_prune_loss
+                # Edge branch losses
+                edge_pred_coords = res_dict['edge']['occ_coords']
+                edge_occ_probs = res_dict['edge']['occ_probs']
+                edge_gt_coords = res_dict['edge']['coords']
+                edge_labels = fast_isin(edge_pred_coords, edge_gt_coords, resolution=1024).float()
+                edge_logits = edge_occ_probs.squeeze()
+                edge_prune_loss = self.asyloss(edge_logits, edge_labels)
+                prune_loss_total += edge_prune_loss
+                total_loss += edge_prune_loss
+                if idx == 3:
+                    mse_loss_feats = torch.tensor(0., device=self.device)
+                    mse_loss_dirs = torch.tensor(0., device=self.device)
+                    # connection_loss = torch.tensor(0., device=self.device)
+                    # --- Vertex Branch (Connection Loss 核心) ---
+                    vtx_pred_coords = res_dict['vertex']['coords_4d']  # [N, 4]
+                    vtx_pred_feats  = res_dict['vertex']['feats']      # [N, C]
+                    # 1.1 排序 (既用于匹配 GT，也用于快速寻找空间邻居)
+                    vtx_pred_keys = flatten_coords_4d(vtx_pred_coords)
+                    vtx_pred_keys_sorted, vtx_pred_order = torch.sort(vtx_pred_keys)
+                    # 1.2 匹配 GT
+                    vtx_gt_keys = flatten_coords_4d(gt_vertex_voxels_1024.to(self.device))
+                    vtx_pos = torch.searchsorted(vtx_pred_keys_sorted, vtx_gt_keys)
+                    vtx_pos = vtx_pos.clamp(max=len(vtx_pred_keys_sorted) - 1)
+                    vtx_match_mask = (vtx_pred_keys_sorted[vtx_pos] == vtx_gt_keys)
+                    gt_to_pred_mapping = torch.full((len(vtx_gt_keys),), -1, device=self.device, dtype=torch.long)
+                    matched_pred_indices = vtx_pred_order[vtx_pos[vtx_match_mask]]
+                    gt_to_pred_mapping[vtx_match_mask] = matched_pred_indices
+                    # ====================================================
+                    # 2. 构建核心数据：正样本 Hash 集合
+                    # ====================================================
+                    # 这里的 pos_u/pos_v 仅用于构建 "什么是真连接" 的查询表
+                    u_gt, v_gt = gt_edges[:, 0], gt_edges[:, 1]
+                    u_pred = gt_to_pred_mapping[u_gt]
+                    v_pred = gt_to_pred_mapping[v_gt]
+                    valid_edge_mask = (u_pred != -1) & (v_pred != -1)
+                    real_pos_u = u_pred[valid_edge_mask]
+                    real_pos_v = v_pred[valid_edge_mask]
+                    num_real_pos = real_pos_u.shape[0]
+                    num_total_nodes = vtx_pred_coords.shape[0]
+                    if num_real_pos > 0:
+                        # 2. 构建候选样本 (Candidate Generation)
+                        # ====================================================
+                        cand_u_list = []
+                        cand_v_list = []
+                        batch_ids = vtx_pred_coords[:, 0]
+                        unique_batches = torch.unique(batch_ids)
+                        RADIUS = 64
+                        MAX_PTS_FOR_DIST = 12000
+                        K_RANDOM = 32
+                        for b_id in unique_batches:
+                            mask_b = (batch_ids == b_id)
+                            indices_b = torch.nonzero(mask_b).squeeze(-1) # Global indices
+                            coords_b = vtx_pred_coords[mask_b, 1:4].float() # (x,y,z)
+                            num_b = coords_b.shape[0]
+                            if num_b < 2: continue
+                            # --- A. Radius Graph (Hard Negatives) ---
+                            if num_b <= MAX_PTS_FOR_DIST:
+                                # 计算距离矩阵 [M, M]
+                                # 注意：autocast 下 float16 ��� cdist 可能精度不够，建议转 float32
+                                dist_mat = torch.cdist(coords_b.float(), coords_b.float())
+                                # 找到距离小于 Radius 的点对 (排除自环)
+                                adj_mat = (dist_mat < RADIUS) & (dist_mat > 1e-6)
+                                # 提取索引 (local indices in batch)
+                                src_local, dst_local = torch.nonzero(adj_mat, as_tuple=True)
+                                # 映射回全局索引
+                                cand_u_list.append(indices_b[src_local])
+                                cand_v_list.append(indices_b[dst_local])
+                            else:
+                                print('num_b is big!')
+                                pass
+                            # --- B. Random Sampling (Easy Negatives) ---
+                            # 随机生成 num_b * K 对
+                            n_rand = num_b * K_RANDOM
+                            rand_src_local = torch.randint(0, num_b, (n_rand,), device=self.device)
+                            rand_dst_local = torch.randint(0, num_b, (n_rand,), device=self.device)
+                            # 映射回全局索引
+                            cand_u_list.append(indices_b[rand_src_local])
+                            cand_v_list.append(indices_b[rand_dst_local])
+                        # 合并所有来源 (GT + Radius + Random)
+                        # 注意：我们把 real_pos 也加进来，确保正样本一定在列表里
+                        all_u = torch.cat([real_pos_u] + cand_u_list)
+                        all_v = torch.cat([real_pos_v] + cand_v_list)
+                        # 3. 去重与 Labeling (Deduplication & Labeling)
+                        # ====================================================
+                        # 构造无向边 Hash: min * N + max
+                        # 确保 MAX_NODES 足够大，比如 1000000 或 num_total_nodes
+                        HASH_BASE = num_total_nodes + 100
+                        p_min = torch.min(all_u, all_v)
+                        p_max = torch.max(all_u, all_v)
+                        # 过滤掉自环 (u==v)
+                        valid_pair = (p_min != p_max)
+                        p_min = p_min[valid_pair]
+                        p_max = p_max[valid_pair]
+                        all_hashes = p_min.long() * HASH_BASE + p_max.long()
+                        # --- 核心：去重 ---
+                        unique_hashes = torch.unique(all_hashes)
+                        # 解码回 u, v
+                        final_u = unique_hashes // HASH_BASE
+                        final_v = unique_hashes % HASH_BASE
+                        # --- Labeling ---
+                        # 构建 GT 的 Hash 表用于查询
+                        gt_min = torch.min(real_pos_u, real_pos_v)
+                        gt_max = torch.max(real_pos_u, real_pos_v)
+                        gt_hashes = gt_min.long() * HASH_BASE + gt_max.long()
+                        gt_hashes = torch.unique(gt_hashes) # GT 也去重一下保险
+                        gt_hashes_sorted, _ = torch.sort(gt_hashes)
+                        # 查询 unique_hashes 是否在 gt_hashes 中
+                        # 使用 searchsorted
+                        idx_search = torch.searchsorted(gt_hashes_sorted, unique_hashes)
+                        idx_search = idx_search.clamp(max=len(gt_hashes_sorted) - 1)
+                        is_connected = (gt_hashes_sorted[idx_search] == unique_hashes)
+                        targets = is_connected.float().unsqueeze(-1) # [N_pairs, 1]
+                        # ============================================================
+                        # 4. 前向传播与 Loss
+                        # ====================================================
+                        feat_u = vtx_pred_feats[final_u]
+                        feat_v = vtx_pred_feats[final_v]
+                        # 对称特征融合
+                        feat_uv = torch.cat([feat_u, feat_v], dim=-1)
+                        feat_vu = torch.cat([feat_v, feat_u], dim=-1)
+                        logits_uv = self.connection_head(feat_uv)
+                        logits_vu = self.connection_head(feat_vu)
+                        logits = (logits_uv + logits_vu) / 2.
+                        # export_sampled_edges(
+                        #     coords=vtx_pred_coords,   # [N, 4]
+                        #     u=final_u,                # [E]
+                        #     v=final_v,                # [E]
+                        #     labels=targets,           # [E, 1]
+                        #     step_idx=b_idx,
+                        # )
+                        # export_sampled_edges(
+                        #     coords=vtx_pred_coords,   # [N, 4]
+                        #     u=final_u,                # [E]
+                        #     v=final_v,                # [E]
+                        #     labels=targets,           # [E, 1]
+                        #     step_idx=b_idx,
+                        #     batch_idx_to_viz=1,
+                        #     save_dir="debug_viz2"
+                        # )
+                        # exit()
+                        connection_loss = self.asyloss(logits, targets)
+                        total_loss += connection_loss
+                    else:
+                        connection_loss = torch.tensor(0., device=self.device)
+                # KL loss
+                kl_loss = posterior.kl(dims=(1,)).mean() * 1e-3 # 1e-3 before
+                total_loss += kl_loss
+        # Backpropagation
+        scaled_total_loss = total_loss / self.accum_steps
+        # self.scaler.scale(scaled_total_loss).backward()
+        scaled_total_loss.backward()
+        return {
+            'total_loss': total_loss.item(),
+            'kl_loss': kl_loss.item(),
+            'prune_loss': prune_loss_total.item(),
+            'vertex_loss': vertex_loss_total.item(),
+            'edge_loss': edge_loss_total.item(),
+            'offset_loss': mse_loss_feats.item(),
+            'direction_loss': mse_loss_dirs.item(),
+            'connection_loss': connection_loss.item(),
+        }
+    def train(self):
+        accum_steps = self.accum_steps
+        for epoch in range(self.cfg['training']['start_epoch'], self.cfg['training']['max_epochs']):
+            self.dataloader.sampler.set_epoch(epoch)
+            # Initialize metrics
+            metrics = {
+                'total_loss': 0.0,
+                'kl_loss': 0.0,
+                'prune_loss': 0.0,
+                'vertex_loss': 0.0,
+                'edge_loss': 0.0,
+                'offset_loss': 0.0,
+                'direction_loss': 0.0,
+                'connection_loss': 0.0,
+            }
+            num_batches = 0
+            self.optimizer.zero_grad(set_to_none=True)
+            for i, batch in enumerate(self.dataloader):
+                # Get all losses from train_step
+                if batch is None:
+                    continue
+                step_losses = self.train_step(batch, i)
+                # Accumulate losses
+                for key in metrics:
+                    metrics[key] += step_losses[key]
+                num_batches += 1
+                if (i + 1) % accum_steps == 0:
+                    # self.scaler.unscale_(self.optimizer)
+                    # torch.nn.utils.clip_grad_norm_(self.vae.parameters(), max_norm=0.5)
+                    # torch.nn.utils.clip_grad_norm_(self.voxel_encoder.parameters(), max_norm=0.5)
+                    # torch.nn.utils.clip_grad_norm_(self.connection_head.parameters(), max_norm=0.5)
+                    # self.scaler.step(self.optimizer)
+                    # self.scaler.update()
+                    # self.optimizer.zero_grad(set_to_none=True)
+                    # self.scheduler.step()
+                    torch.nn.utils.clip_grad_norm_(self.vae.parameters(), max_norm=0.5)
+                    torch.nn.utils.clip_grad_norm_(self.voxel_encoder.parameters(), max_norm=0.5)
+                    torch.nn.utils.clip_grad_norm_(self.connection_head.parameters(), max_norm=0.5)
+                    self.optimizer.step()
+                    self.optimizer.zero_grad(set_to_none=True)
+                    self.scheduler.step()
+                # Print batch-level metrics
+                if self.is_master:
+                    avg_metric = {key: value / num_batches for key, value in metrics.items()}
+                    print(
+                        f"[Epoch {epoch}] Batch:{num_batches} "
+                        f"AvgL:{avg_metric['total_loss']:.4f} "
+                        f"Loss: {step_losses['total_loss']:.4f}, "
+                        f"KLL: {step_losses['kl_loss']:.4f}, "
+                        f"PruneL: {step_losses['prune_loss']:.4f}, "
+                        f"VertexL: {step_losses['vertex_loss']:.4f}, "
+                        f"EdgeL: {step_losses['edge_loss']:.4f}, "
+                        f"OffsetL: {step_losses['offset_loss']:.4f}, "
+                        f"DireL: {step_losses['direction_loss']:.4f}, "
+                        f"ConL: {step_losses['connection_loss']:.4f}, "
+                        f"LR: {self.optimizer.param_groups[0]['lr']:.4e} "
+                    )
+                    if i % 2000 == 0 and i != 0:
+                        self.save_checkpoint(epoch, avg_metric['total_loss'], i)
+                        with open(self.log_file, "a") as f:
+                            current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+                            log_line = (
+                                f"Epoch {epoch:05d} | "
+                                f"Batch {i:05d} | "
+                                f"Loss: {avg_metric['total_loss']:.6f} "
+                                f"Avg KLL: {avg_metric['kl_loss']:.4f} "
+                                f"Avg PruneL: {avg_metric['prune_loss']:.4f} "
+                                f"Avg VertexL: {avg_metric['vertex_loss']:.4f} "
+                                f"Avg EdgeL: {avg_metric['edge_loss']:.4f} "
+                                f"Avg OffsetL: {avg_metric['offset_loss']:.4f} "
+                                f"Avg DireL: {avg_metric['direction_loss']:.4f} "
+                                f"Avg ConL: {avg_metric['connection_loss']:.4f} "
+                                f"LR: {self.optimizer.param_groups[0]['lr']:.4e} "
+                                f"[{current_time}]\n"
+                            )
+                            f.write(log_line)
+            if num_batches % accum_steps != 0:
+                # self.scaler.unscale_(self.optimizer)
+                # torch.nn.utils.clip_grad_norm_(self.vae.parameters(), max_norm=0.5)
+                # torch.nn.utils.clip_grad_norm_(self.voxel_encoder.parameters(), max_norm=0.5)
+                # torch.nn.utils.clip_grad_norm_(self.connection_head.parameters(), max_norm=0.5)
+                # self.scaler.step(self.optimizer)
+                # self.scaler.update()
+                # self.optimizer.zero_grad(set_to_none=True)
+                # self.scheduler.step()
+                torch.nn.utils.clip_grad_norm_(self.vae.parameters(), max_norm=0.5)
+                torch.nn.utils.clip_grad_norm_(self.voxel_encoder.parameters(), max_norm=0.5)
+                torch.nn.utils.clip_grad_norm_(self.connection_head.parameters(), max_norm=0.5)
+                self.optimizer.step()
+                self.optimizer.zero_grad(set_to_none=True)
+                self.scheduler.step()
+            # Calculate epoch averages
+            avg_metrics = {key: value / num_batches for key, value in metrics.items()}
+            self.train_loss_history.append(avg_metrics['total_loss'])
+            # Log to file
+            if self.is_master:
+                with open(self.log_file, "a") as f:
+                    current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+                    log_line = (
+                        f"Epoch {epoch:05d} | "
+                        f"Loss: {avg_metrics['total_loss']:.6f} "
+                        f"Avg KLL: {avg_metrics['kl_loss']:.4f} "
+                        f"Avg PruneL: {avg_metrics['prune_loss']:.4f} "
+                        f"Avg VertexL: {avg_metrics['vertex_loss']:.4f} "
+                        f"Avg EdgeL: {avg_metrics['edge_loss']:.4f} "
+                        f"Avg OffsetL: {avg_metrics['offset_loss']:.4f} "
+                        f"Avg DireL: {avg_metrics['direction_loss']:.4f} "
+                        f"Avg ConL: {avg_metrics['connection_loss']:.4f} "
+                        f"LR: {self.optimizer.param_groups[0]['lr']:.4e} "
+                        f"[{current_time}]\n"
+                    )
+                    f.write(log_line)
+                # Print epoch summary
+                print(
+                    f"[Epoch {epoch}] "
+                    f"Avg Loss: {avg_metrics['total_loss']:.4f} "
+                    f"Avg KLL: {avg_metrics['kl_loss']:.4f} "
+                    f"Avg PruneL: {avg_metrics['prune_loss']:.4f} "
+                    f"Avg VertexL: {avg_metrics['vertex_loss']:.4f} "
+                    f"Avg EdgeL: {avg_metrics['edge_loss']:.4f} "
+                    f"Avg OffsetL: {avg_metrics['offset_loss']:.4f} "
+                    f"Avg DireL: {avg_metrics['direction_loss']:.4f} "
+                    f"Avg ConL: {avg_metrics['connection_loss']:.4f} "
+                    f"[{current_time}]\n"
+                )
+                # Save checkpoint
+                if epoch % self.cfg['training']['save_every'] == 0:
+                    self.save_checkpoint(epoch, avg_metrics['total_loss'], i)
+            # Update learning rate
+            if self.is_master:
+                current_lr = self.optimizer.param_groups[0]['lr']
+                print(f"Epoch {epoch}: Learning rate updated to {current_lr:.2e}")
+            dist.barrier()
+def main():
+    # Initialize the process group
+    dist.init_process_group(backend='nccl')
+    # Get rank and world size from environment variables set by the launcher
+    rank = int(os.environ['RANK'])
+    world_size = int(os.environ['WORLD_SIZE'])
+    local_rank = int(os.environ['LOCAL_RANK'])
+    # Set the device for the current process. This is crucial.
+    torch.cuda.set_device(local_rank)
+    torch.manual_seed(42)
+    # with torch.cuda.amp.autocast(dtype=torch.bfloat16):
+    # Pass the distributed info to the Trainer
+    trainer = Trainer(
+        config_path="/root/Trisf/config_edge_1024_error_8enc_8dec_woself_finetune_128to1024.yaml",
+        rank=rank,
+        world_size=world_size,
+        local_rank=local_rank
+    )
+    trainer.train()
+    # Clean up the process group
+    dist.destroy_process_group()
+if __name__ == '__main__':
+    main()

train_slat_vae_512_128to256_pointnet_head.py ADDED Viewed

	@@ -0,0 +1,917 @@

+import torch.nn as nn
+import os
+# os.environ['ATTN_BACKEND'] = 'xformers'
+import yaml
+import torch
+import time
+from datetime import datetime
+from torch.utils.data import DataLoader
+from functools import partial
+from triposf.modules.sparse.basic import SparseTensor
+import torch.nn.functional as F
+from torch.optim import AdamW
+from torch.cuda.amp import GradScaler, autocast
+from triposf.models.triposf_vae.VoxelFeatureVAE_edge_woself_128to1024_decoder_head import VoxelVAE
+from vertex_encoder import VoxelFeatureEncoder_active_pointnet, ConnectionHead
+from dataset_triposf_head import VoxelVertexDataset_edge, collate_fn_pointnet
+from utils import load_pretrained_woself, AdaptiveFocalLoss, fast_isin, AsymmetricFocalLoss, DiceLoss
+import torch.distributed as dist
+from torch.nn.parallel import DistributedDataParallel as DDP
+from torch.utils.data.distributed import DistributedSampler
+from transformers import get_cosine_schedule_with_warmup
+import math
+import numpy as np
+import open3d as o3d
+def export_sampled_edges(coords, u, v, labels, step_idx, save_dir="debug_viz", batch_idx_to_viz=0):
+    """
+    导出采样边为 PLY 文件。
+    Args:
+        coords: [N, 4] Tensor (batch_idx, x, y, z)
+        u: [E] Tensor, 起点索引 (global index)
+        v: [E] Tensor, 终点索引 (global index)
+        labels: [E, 1] Tensor, 1.0 为正样本, 0.0 为负样本
+        step_idx: 当前步数或 epoch，用于文件名
+        save_dir: 保存目录
+        batch_idx_to_viz: 只可视化哪个 batch 的数据 (防止多个 batch 叠加在一起看不清)
+    """
+    os.makedirs(save_dir, exist_ok=True)
+    # 1. 转为 CPU numpy
+    coords_np = coords.detach().cpu().numpy()
+    u_np = u.detach().cpu().numpy()
+    v_np = v.detach().cpu().numpy()
+    labels_np = labels.detach().cpu().numpy().reshape(-1)
+    # 2. 筛选特定 Batch (通常只看 Batch 0 比较清晰)
+    # coords 的第0列是 batch index
+    batch_mask = (coords_np[:, 0] == batch_idx_to_viz)
+    # 获取属于该 batch 的全局索引范围
+    # 注意：u 和 v 是针对所有 coords 的全局索引。
+    # 我们需要判断一条边的两个端点是否都在这个 batch 内。
+    # 快速检查端点是否在当前 batch
+    valid_u_in_batch = batch_mask[u_np]
+    valid_v_in_batch = batch_mask[v_np]
+    edge_batch_mask = valid_u_in_batch & valid_v_in_batch
+    if edge_batch_mask.sum() == 0:
+        print(f"Warning: No edges found for batch {batch_idx_to_viz}")
+        return
+    # 应用 Batch 筛选
+    u_b = u_np[edge_batch_mask]
+    v_b = v_np[edge_batch_mask]
+    labels_b = labels_np[edge_batch_mask]
+    # 3. 提取该 Batch 的顶点坐标 (去掉 batch_idx 维度)
+    # 此时我们需要重新映射 u, v 的索引，因为我们要只保存该 batch 的点
+    batch_indices_global = np.where(batch_mask)[0]
+    # 创建全局索引到局部索引的映射表
+    global_to_local = {gid: lid for lid, gid in enumerate(batch_indices_global)}
+    points_xyz = coords_np[batch_indices_global, 1:4] # [M, 3]
+    # 转换 u, v 为局部索引
+    try:
+        u_local = np.array([global_to_local[idx] for idx in u_b])
+        v_local = np.array([global_to_local[idx] for idx in v_b])
+    except KeyError:
+        print("Error in index mapping. Edge endpoints might cross batches.")
+        return
+    # 4. 分离正负样本
+    pos_mask = labels_b > 0.5
+    neg_mask = ~pos_mask
+    # 内部函数：写 PLY
+    def write_ply(filename, points, edges_u, edges_v, color_rgb):
+        num_verts = len(points)
+        num_edges = len(edges_u)
+        with open(filename, 'w') as f:
+            f.write("ply\n")
+            f.write("format ascii 1.0\n")
+            f.write(f"element vertex {num_verts}\n")
+            f.write("property float x\n")
+            f.write("property float y\n")
+            f.write("property float z\n")
+            f.write("property uchar red\n")
+            f.write("property uchar green\n")
+            f.write("property uchar blue\n")
+            f.write(f"element edge {num_edges}\n")
+            f.write("property int vertex1\n")
+            f.write("property int vertex2\n")
+            f.write("end_header\n")
+            # Write Vertices with Color
+            # 为了让可视化更清楚，我们将所有点染成指定颜色
+            for i in range(num_verts):
+                x, y, z = points[i]
+                f.write(f"{x:.4f} {y:.4f} {z:.4f} {color_rgb[0]} {color_rgb[1]} {color_rgb[2]}\n")
+            # Write Edges
+            for i in range(num_edges):
+                f.write(f"{edges_u[i]} {edges_v[i]}\n")
+        print(f"Saved: {filename} (Edges: {num_edges})")
+    # 5. 保存正样本 (绿色)
+    if pos_mask.sum() > 0:
+        write_ply(
+            os.path.join(save_dir, f"step_{step_idx}_pos_edges.ply"),
+            points_xyz, # 使用所有点，或者优化为只使用涉及的点（这里为了坐标统一���单起见使用所有点）
+            u_local[pos_mask],
+            v_local[pos_mask],
+            color_rgb=(0, 255, 0) # Green
+        )
+    # 6. 保存负样本 (红色)
+    if neg_mask.sum() > 0:
+        # 为了避免文件太大或太乱，如果负样本特别多，可以考虑随机采样一部分保存
+        # 这里默认全部保存
+        write_ply(
+            os.path.join(save_dir, f"step_{step_idx}_neg_edges.ply"),
+            points_xyz,
+            u_local[neg_mask],
+            v_local[neg_mask],
+            color_rgb=(255, 0, 0) # Red
+        )
+def flatten_coords_4d(coords_4d: torch.Tensor):
+    coords_4d_long = coords_4d.long()
+    base_x = 256
+    base_y = 256 * 256
+    base_z = 256 * 256 * 256
+    flat_coords = coords_4d_long[:, 0] * base_z + \
+                  coords_4d_long[:, 1] * base_y + \
+                  coords_4d_long[:, 2] * base_x + \
+                  coords_4d_long[:, 3]
+    return flat_coords
+def downsample_voxels(
+    voxels: torch.Tensor,
+    input_resolution: int,
+    output_resolution: int
+) -> torch.Tensor:
+    if input_resolution % output_resolution != 0:
+        raise ValueError(f"input_resolution ({input_resolution}) must be divisible "
+                         f"by output_resolution ({output_resolution}).")
+    factor = input_resolution // output_resolution
+    downsampled_voxels = voxels.clone().to(torch.long)
+    downsampled_voxels[:, 1:] //= factor
+    unique_downsampled_voxels = torch.unique(downsampled_voxels, dim=0)
+    return unique_downsampled_voxels
+class Trainer:
+    def __init__(self, config_path, rank, world_size, local_rank):
+        self.rank = rank
+        self.world_size = world_size
+        self.local_rank = local_rank
+        self.is_master = self.rank == 0
+        self.load_config(config_path)
+        self.accum_steps = max(1, 4 // self.cfg['training']['batch_size'])
+        self.config_hash = self.save_config_with_hash()
+        self.init_device()
+        self.init_dirs()
+        self.init_components()
+        self.init_training()
+        self.train_loss_history = []
+        self.eval_loss_history = []
+        self.best_eval_loss = float('inf')
+    def save_config_with_hash(self):
+        import hashlib
+        # Serialize config to hash
+        config_str = yaml.dump(self.cfg)
+        config_hash = hashlib.md5(config_str.encode()).hexdigest()[:8]
+        # Prepare all flags as string for formatting
+        add_block_embed_flag = "True" if self.cfg['model']['add_block_embed'] else "False"
+        using_attn_flag = "True" if self.cfg['model']['using_attn'] else "False"
+        dataset_name = os.path.basename(self.cfg['dataset']['path'])
+        # Format save_dir with all placeholders
+        self.cfg['experiment']['save_dir'] = self.cfg['experiment']['save_dir'].format(
+            dataset_name=dataset_name,
+            config_hash=config_hash,
+            n_train_samples=self.cfg['dataset']['n_train_samples'],
+            multires=self.cfg['model']['multires'],
+            add_block_embed=add_block_embed_flag,
+            using_attn=using_attn_flag,
+            batch_size=self.cfg['training']['batch_size'],
+        )
+        if self.is_master:
+            os.makedirs(self.save_dir, exist_ok=True)
+            config_path = os.path.join(self.save_dir, "config.yaml")
+            with open(config_path, 'w') as f:
+                yaml.dump(self.cfg, f)
+        dist.barrier()
+        return config_hash
+    def save_checkpoint(self, epoch, avg_loss, batch_idx):
+        if not self.is_master:
+            return
+        checkpoint_path = os.path.join(self.save_dir, f"checkpoint_epoch{epoch}_batch{batch_idx}_loss{avg_loss:.4f}.pt")
+        config_path = os.path.join(self.save_dir, "config.yaml")
+        torch.save({
+            'voxel_encoder': self.voxel_encoder.module.state_dict(),
+            'vae': self.vae.module.state_dict(),
+            'connection_head': self.connection_head.module.state_dict(),
+            'epoch': epoch,
+            'loss': avg_loss,
+            'config': self.cfg
+        }, checkpoint_path)
+        def quoted_presenter(dumper, data):
+            return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='"')
+        yaml.add_representer(str, quoted_presenter)
+        with open(config_path, 'w') as f:
+            yaml.dump(self.cfg, f)
+    def load_config(self, config_path):
+        with open(config_path) as f:
+            self.cfg = yaml.safe_load(f)
+        # Extract and convert flags for formatting
+        add_block_embed_flag = "True" if self.cfg['model']['add_block_embed'] else "False"
+        using_attn_flag = "True" if self.cfg['model']['using_attn'] else "False"
+        dataset_name = os.path.basename(self.cfg['dataset']['path'])
+        self.save_dir = self.cfg['experiment']['save_dir'].format(
+            dataset_name=dataset_name,
+            n_train_samples=self.cfg['dataset']['n_train_samples'],
+            multires=self.cfg['model']['multires'],
+            add_block_embed=add_block_embed_flag,
+            using_attn=using_attn_flag,
+            batch_size=self.cfg['training']['batch_size'],
+        )
+        if self.is_master:
+            os.makedirs(self.save_dir, exist_ok=True)
+        dist.barrier()
+    def init_device(self):
+        self.device = torch.device(f"cuda:{self.local_rank}")
+    def init_dirs(self):
+        self.log_file = os.path.join(self.save_dir, f"training_log_{self.cfg['training']['lr']}.txt")
+        if self.is_master:
+            with open(self.log_file, "a") as f:
+                current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+                f.write(f"[{current_time}] Config loaded for distributed training with world size {self.world_size}\n")
+    def init_components(self):
+        self.dataset = VoxelVertexDataset_edge(
+            root_dir=self.cfg['dataset']['path'],
+            base_resolution=self.cfg['dataset']['base_resolution'],
+            min_resolution=self.cfg['dataset']['min_resolution'],
+            cache_dir=self.cfg['dataset']['cache_dir'],
+            renders_dir=self.cfg['dataset']['renders_dir'],
+            filter_active_voxels=self.cfg['dataset']['filter_active_voxels'],
+            cache_filter_path=self.cfg['dataset']['cache_filter_path'],
+            active_voxel_res=128,
+            pc_sample_number=819200,
+            sample_type=self.cfg['dataset']['sample_type'],
+        )
+        self.sampler = DistributedSampler(
+            self.dataset,
+            num_replicas=self.world_size,
+            rank=self.rank,
+            shuffle=True,
+        )
+        self.dataloader = DataLoader(
+            self.dataset,
+            batch_size=self.cfg['training']['batch_size'],
+            shuffle=False,
+            collate_fn=partial(collate_fn_pointnet,),
+            num_workers=self.cfg['training']['num_workers'],
+            pin_memory=True,
+            sampler=self.sampler,
+            # prefetch_factor=4,
+            persistent_workers=True,
+        )
+        self.voxel_encoder = VoxelFeatureEncoder_active_pointnet(
+            in_channels=15,
+            hidden_dim=256,
+            out_channels=1024,
+            scatter_type='mean',
+            n_blocks=5,
+            resolution=128,
+            add_label=False,
+        ).to(self.device)
+        self.connection_head = ConnectionHead(
+            channels=128 * 2,
+            out_channels=1,
+            mlp_ratio=4,
+        ).to(self.device)
+        # ablation 3: voxelvae_1volume, have tested
+        self.vae = VoxelVAE(
+            in_channels=self.cfg['model']['in_channels'],
+            latent_dim=self.cfg['model']['latent_dim'],
+            encoder_blocks=self.cfg['model']['encoder_blocks'],
+            decoder_blocks_vtx=self.cfg['model']['decoder_blocks_vtx'],
+            decoder_blocks_edge=self.cfg['model']['decoder_blocks_edge'],
+            num_heads=8,
+            num_head_channels=64,
+            mlp_ratio=4.0,
+            attn_mode="swin",
+            window_size=8,
+            pe_mode="ape",
+            use_fp16=False,
+            use_checkpoint=True,
+            qk_rms_norm=False,
+            using_subdivide=True,
+            using_attn=self.cfg['model']['using_attn'],
+            attn_first=self.cfg['model'].get('attn_first', True),
+            pred_direction=self.cfg['model'].get('pred_direction', False),
+        ).to(self.device)
+        if self.cfg['training']['from_pretrained']:
+            load_pretrained_woself(
+                checkpoint_path=self.cfg['training']['checkpoint_path'],
+                voxel_encoder=self.voxel_encoder,
+                vae=self.vae,
+                connection_head=self.connection_head,
+                optimizer=None,
+            )
+        self.voxel_encoder = DDP(self.voxel_encoder, device_ids=[self.local_rank], find_unused_parameters=False)
+        self.connection_head = DDP(self.connection_head, device_ids=[self.local_rank], find_unused_parameters=False)
+        self.vae = DDP(self.vae, device_ids=[self.local_rank], find_unused_parameters=False)
+    def init_training(self):
+        self.optimizer = AdamW(
+            list(self.vae.module.parameters()) +
+            list(self.voxel_encoder.module.parameters()) +
+            list(self.connection_head.module.parameters()),
+            lr=self.cfg['training']['lr'],
+            weight_decay=0.01,
+        )
+        num_update_steps_per_epoch = math.ceil(len(self.dataloader) / self.accum_steps)
+        max_epochs = self.cfg['training']['max_epochs']
+        num_training_steps = max_epochs * num_update_steps_per_epoch
+        num_warmup_steps = 100
+        self.scheduler = get_cosine_schedule_with_warmup(
+            self.optimizer,
+            num_warmup_steps=num_warmup_steps,
+            num_training_steps=num_training_steps
+        )
+        self.focal_loss = AdaptiveFocalLoss(gamma=2.0, max_alpha=100.0).to(self.device)
+        self.mse_loss = nn.MSELoss(reduction='mean').to(self.device)
+        self.asyloss = AsymmetricFocalLoss(
+            gamma_pos=0.0,
+            gamma_neg=4.0,
+            clip=0.05,
+        )
+        self.bce_loss = torch.nn.BCEWithLogitsLoss()
+        self.dice_loss = DiceLoss()
+        self.scaler = GradScaler()
+    def train_step(self, batch):
+        """Modified training step that handles vertex and edge voxels separately after initial prediction."""
+        # 1. Retrieve data from batch
+        combined_voxels_256 = batch['combined_voxels_256'].to(self.device)
+        combined_voxel_labels_256 = batch['combined_voxel_labels_256'].to(self.device)
+        gt_vertex_voxels_256 = batch['gt_vertex_voxels_256'].to(self.device)
+        gt_edge_voxels_256 = batch['gt_edge_voxels_256'].to(self.device)
+        gt_combined_endpoints_256 = batch['gt_combined_endpoints_256'].to(self.device)
+        gt_combined_errors_256 = batch['gt_combined_errors_256'].to(self.device)
+        gt_edges = batch['gt_vertex_edge_indices_256'].to(self.device)
+        edge_mask = (combined_voxel_labels_256 == 1)
+        gt_edge_endpoints_256 = gt_combined_endpoints_256[edge_mask]
+        gt_edge_errors_256 = gt_combined_errors_256[edge_mask]
+        p1 = gt_edge_endpoints_256[:, 1:4].float()
+        p2 = gt_edge_endpoints_256[:, 4:7].float()
+        mask = ( (p1[:,0] < p2[:,0]) |
+                ((p1[:,0] == p2[:,0]) & (p1[:,1] < p2[:,1])) |
+                ((p1[:,0] == p2[:,0]) & (p1[:,1] == p2[:,1]) & (p1[:,2] <= p2[:,2])) )
+        pA = torch.where(mask[:, None], p1, p2)  # smaller one
+        pB = torch.where(mask[:, None], p2, p1)  # larger one
+        d = pB - pA
+        dir_gt = F.normalize(d, dim=-1, eps=1e-6)
+        vtx_128 = downsample_voxels(gt_vertex_voxels_256, input_resolution=256, output_resolution=128)
+        vtx_256 = gt_vertex_voxels_256
+        edge_128 = downsample_voxels(combined_voxels_256, input_resolution=256, output_resolution=128)
+        edge_256 = combined_voxels_256
+        active_coords = batch['active_voxels_128'].to(self.device)
+        point_cloud = batch['point_cloud_128'].to(self.device)
+        with torch.cuda.amp.autocast(dtype=torch.bfloat16):
+            active_voxel_feats = self.voxel_encoder(
+                p=point_cloud,
+                sparse_coords=active_coords,
+                res=128,
+                bbox_size=(-0.5, 0.5),
+            )
+            sparse_input = SparseTensor(
+                feats=active_voxel_feats,
+                coords=active_coords.int()
+            )
+            gt_edge_voxels_list = [
+                edge_128,
+                edge_256,
+            ]
+            gt_vertex_voxels_list = [
+                vtx_128,
+                vtx_256,
+            ]
+            results, posterior, latent_128 = self.vae(
+                sparse_input,
+                gt_vertex_voxels_list=gt_vertex_voxels_list,
+                gt_edge_voxels_list=gt_edge_voxels_list,
+                training=True,
+                sample_ratio=0.,
+            )
+            # print("results[-1]['edge']['coords_4d'][1827:1830]", results[-1]['edge']['coords_4d'][1827:1830])
+            total_loss = 0.
+            prune_loss_total = 0.
+            vertex_loss_total = 0.
+            edge_loss_total=0.
+            with autocast(dtype=torch.bfloat16):
+                initial_result = results[0]
+                vertex_mask = initial_result['vertex_mask']
+                vtx_logits = initial_result['vtx_feats']
+                vertex_loss = self.asyloss(vtx_logits.squeeze(-1), vertex_mask.float())
+                edge_mask = initial_result['edge_mask']
+                edge_logits = initial_result['edge_feats']
+                edge_loss = self.asyloss(edge_logits.squeeze(-1), edge_mask.float())
+                vertex_loss_total += vertex_loss
+                edge_loss_total += edge_loss
+                total_loss += vertex_loss
+                total_loss += edge_loss
+                # Process each level's results
+                for idx, res_dict in enumerate(results[1:], start=1):
+                    # Vertex branch losses
+                    vertex_pred_coords = res_dict['vertex']['occ_coords']
+                    vertex_occ_probs = res_dict['vertex']['occ_probs']
+                    vertex_gt_coords = res_dict['vertex']['coords']
+                    vertex_labels = fast_isin(vertex_pred_coords, vertex_gt_coords, resolution=256).float()
+                    # print('vertex_labels.sum()', vertex_labels.sum(), idx)
+                    vertex_logits = vertex_occ_probs.squeeze()
+                    # if vertex_labels.sum() > 0 and vertex_labels.sum() < len(vertex_labels):
+                    vertex_prune_loss = self.focal_loss(vertex_logits, vertex_labels)
+                    # vertex_prune_loss = self.dice_loss(vertex_logits, vertex_labels)
+                    # dilation 1: bce loss
+                    # vertex_prune_loss = self.bce_loss(vertex_logits, vertex_labels,)
+                    prune_loss_total += vertex_prune_loss
+                    total_loss += vertex_prune_loss
+                    # Edge branch losses
+                    edge_pred_coords = res_dict['edge']['occ_coords']
+                    edge_occ_probs = res_dict['edge']['occ_probs']
+                    edge_gt_coords = res_dict['edge']['coords']
+                    edge_labels = fast_isin(edge_pred_coords, edge_gt_coords, resolution=256).float()
+                    # print('edge_labels.sum()', edge_labels.sum(), idx)
+                    edge_logits = edge_occ_probs.squeeze()
+                    # if edge_labels.sum() > 0 and edge_labels.sum() < len(edge_labels):
+                    edge_prune_loss = self.focal_loss(edge_logits, edge_labels)
+                    # dilation 1: bce loss
+                    # edge_prune_loss = self.bce_loss(edge_logits, edge_labels,)
+                    prune_loss_total += edge_prune_loss
+                    total_loss += edge_prune_loss
+                    if idx == 1:
+                        pred_coords = res_dict['edge']['coords_4d']              # [N,4] (b,x,y,z)
+                        pred_feats  = res_dict['edge']['predicted_offset_feats'] # [N,C]
+                        gt_coords = gt_edge_voxels_256.to(pred_coords.device)    # [M,4]
+                        gt_feats  = gt_edge_errors_256[:, 1:].to(pred_coords.device)  # [M,C]
+                        pred_keys = flatten_coords_4d(pred_coords)
+                        gt_keys   = flatten_coords_4d(gt_coords)
+                        sorted_pred_keys, pred_order = torch.sort(pred_keys)
+                        pred_coords_sorted = pred_coords[pred_order]
+                        pred_feats_sorted  = pred_feats[pred_order]
+                        sorted_gt_keys, gt_order = torch.sort(gt_keys)
+                        gt_coords_sorted = gt_coords[gt_order]
+                        gt_feats_sorted  = gt_feats[gt_order]
+                        pos = torch.searchsorted(sorted_gt_keys, sorted_pred_keys)
+                        valid_mask = (pos < len(sorted_gt_keys)) & (sorted_gt_keys[pos] == sorted_pred_keys)
+                        if valid_mask.any():
+                            # print('valid_mask.sum()', valid_mask.sum())
+                            matched_pred_feats = pred_feats_sorted[valid_mask]
+                            matched_gt_feats   = gt_feats_sorted[pos[valid_mask]]
+                            mse_loss_feats = self.mse_loss(matched_pred_feats, matched_gt_feats * 2)
+                            total_loss += mse_loss_feats * 0.
+                            if self.cfg['model'].get('pred_direction', False):
+                                pred_dirs = res_dict['edge']['predicted_direction_feats']
+                                dir_gt_device = dir_gt.to(pred_coords.device)
+                                pred_dirs_sorted = pred_dirs[pred_order]
+                                dir_gt_sorted = dir_gt_device[gt_order]
+                                matched_pred_dirs = pred_dirs_sorted[valid_mask]
+                                matched_gt_dirs   = dir_gt_sorted[pos[valid_mask]]
+                                mse_loss_dirs = self.mse_loss(matched_pred_dirs, matched_gt_dirs)
+                                total_loss += mse_loss_dirs * 0.
+                        else:
+                            mse_loss_feats = torch.tensor(0., device=pred_coords.device)
+                            if self.cfg['model'].get('pred_direction', False):
+                                mse_loss_dirs = torch.tensor(0., device=pred_coords.device)
+                        # --- Vertex Branch (Connection Loss 核心) ---
+                        vtx_pred_coords = res_dict['vertex']['coords_4d']  # [N, 4]
+                        vtx_pred_feats  = res_dict['vertex']['feats']      # [N, C]
+                        # 1.1 排序 (既用于匹配 GT，也用于快速寻找空间邻居)
+                        vtx_pred_keys = flatten_coords_4d(vtx_pred_coords)
+                        vtx_pred_keys_sorted, vtx_pred_order = torch.sort(vtx_pred_keys)
+                        # 1.2 匹配 GT
+                        vtx_gt_keys = flatten_coords_4d(gt_vertex_voxels_256.to(self.device))
+                        vtx_pos = torch.searchsorted(vtx_pred_keys_sorted, vtx_gt_keys)
+                        vtx_pos = vtx_pos.clamp(max=len(vtx_pred_keys_sorted) - 1)
+                        vtx_match_mask = (vtx_pred_keys_sorted[vtx_pos] == vtx_gt_keys)
+                        gt_to_pred_mapping = torch.full((len(vtx_gt_keys),), -1, device=self.device, dtype=torch.long)
+                        matched_pred_indices = vtx_pred_order[vtx_pos[vtx_match_mask]]
+                        gt_to_pred_mapping[vtx_match_mask] = matched_pred_indices
+                        # ====================================================
+                        # 2. 构建核心数据：正样本 Hash 集合
+                        # ====================================================
+                        # 这里的 pos_u/pos_v 仅用于构建 "什么是真连接" 的查询表
+                        u_gt, v_gt = gt_edges[:, 0], gt_edges[:, 1]
+                        u_pred = gt_to_pred_mapping[u_gt]
+                        v_pred = gt_to_pred_mapping[v_gt]
+                        valid_edge_mask = (u_pred != -1) & (v_pred != -1)
+                        real_pos_u = u_pred[valid_edge_mask]
+                        real_pos_v = v_pred[valid_edge_mask]
+                        num_real_pos = real_pos_u.shape[0]
+                        num_total_nodes = vtx_pred_coords.shape[0]
+                        if num_real_pos > 0:
+                            # 2. 构建候选样本 (Candidate Generation)
+                            # ====================================================
+                            cand_u_list = []
+                            cand_v_list = []
+                            batch_ids = vtx_pred_coords[:, 0]
+                            unique_batches = torch.unique(batch_ids)
+                            RADIUS = 16
+                            MAX_PTS_FOR_DIST = 12000
+                            K_RANDOM = 32
+                            for b_id in unique_batches:
+                                mask_b = (batch_ids == b_id)
+                                indices_b = torch.nonzero(mask_b).squeeze(-1) # Global indices
+                                coords_b = vtx_pred_coords[mask_b, 1:4].float() # (x,y,z)
+                                num_b = coords_b.shape[0]
+                                if num_b < 2: continue
+                                # --- A. Radius Graph (Hard Negatives) ---
+                                if num_b <= MAX_PTS_FOR_DIST:
+                                    # 计算距离矩阵 [M, M]
+                                    # 注意：autocast 下 float16 的 cdist 可能精度不够，建议转 float32
+                                    dist_mat = torch.cdist(coords_b.float(), coords_b.float())
+                                    # 找到距离小于 Radius 的点对 (排除自环)
+                                    adj_mat = (dist_mat < RADIUS) & (dist_mat > 1e-6)
+                                    # 提取索引 (local indices in batch)
+                                    src_local, dst_local = torch.nonzero(adj_mat, as_tuple=True)
+                                    # 映射回全局索引
+                                    cand_u_list.append(indices_b[src_local])
+                                    cand_v_list.append(indices_b[dst_local])
+                                else:
+                                    # 如果点太多，显存不够，退化为随机局部采样或跳过
+                                    # 这里简单处理：跳过 Radius Graph，依赖 Random
+                                    pass
+                                # --- B. Random Sampling (Easy Negatives) ---
+                                # 随机生成 num_b * K 对
+                                n_rand = num_b * K_RANDOM
+                                rand_src_local = torch.randint(0, num_b, (n_rand,), device=self.device)
+                                rand_dst_local = torch.randint(0, num_b, (n_rand,), device=self.device)
+                                # 映射回全局索引
+                                cand_u_list.append(indices_b[rand_src_local])
+                                cand_v_list.append(indices_b[rand_dst_local])
+                            # 合并所有来源 (GT + Radius + Random)
+                            # 注意：我们把 real_pos 也加进来，确保正样本一定在列表里
+                            all_u = torch.cat([real_pos_u] + cand_u_list)
+                            all_v = torch.cat([real_pos_v] + cand_v_list)
+                            # 3. 去重与 Labeling (Deduplication & Labeling)
+                            # ====================================================
+                            # 构造无向边 Hash: min * N + max
+                            # 确保 MAX_NODES 足够大，比如 1000000 或 num_total_nodes
+                            HASH_BASE = num_total_nodes + 100
+                            p_min = torch.min(all_u, all_v)
+                            p_max = torch.max(all_u, all_v)
+                            # 过滤掉自环 (u==v)
+                            valid_pair = (p_min != p_max)
+                            p_min = p_min[valid_pair]
+                            p_max = p_max[valid_pair]
+                            all_hashes = p_min.long() * HASH_BASE + p_max.long()
+                            # --- 核心：去重 ---
+                            unique_hashes = torch.unique(all_hashes)
+                            # 解码回 u, v
+                            final_u = unique_hashes // HASH_BASE
+                            final_v = unique_hashes % HASH_BASE
+                            # --- Labeling ---
+                            # 构建 GT 的 Hash 表用于查询
+                            gt_min = torch.min(real_pos_u, real_pos_v)
+                            gt_max = torch.max(real_pos_u, real_pos_v)
+                            gt_hashes = gt_min.long() * HASH_BASE + gt_max.long()
+                            gt_hashes = torch.unique(gt_hashes) # GT 也去重一下保险
+                            gt_hashes_sorted, _ = torch.sort(gt_hashes)
+                            # 查询 unique_hashes 是否在 gt_hashes 中
+                            # 使用 searchsorted
+                            idx_search = torch.searchsorted(gt_hashes_sorted, unique_hashes)
+                            idx_search = idx_search.clamp(max=len(gt_hashes_sorted) - 1)
+                            is_connected = (gt_hashes_sorted[idx_search] == unique_hashes)
+                            targets = is_connected.float().unsqueeze(-1) # [N_pairs, 1]
+                            # 4. 前向传播与 Loss
+                            # ====================================================
+                            feat_u = vtx_pred_feats[final_u]
+                            feat_v = vtx_pred_feats[final_v]
+                            # 对称特征融合
+                            feat_uv = torch.cat([feat_u, feat_v], dim=-1)
+                            feat_vu = torch.cat([feat_v, feat_u], dim=-1)
+                            logits_uv = self.connection_head(feat_uv)
+                            logits_vu = self.connection_head(feat_vu)
+                            logits = logits_uv + logits_vu
+                            # print('targets.sum()', targets.sum())
+                            # print('targets.shape', targets.shape)
+                            # export_sampled_edges(
+                            #     coords=vtx_pred_coords,   # [N, 4]
+                            #     u=final_u,                # [E]
+                            #     v=final_v,                # [E]
+                            #     labels=targets,           # [E, 1]
+                            #     step_idx=0,
+                            # )
+                            # exit()
+                            # Focal Loss
+                            connection_loss = self.asyloss(logits, targets)
+                            total_loss += connection_loss
+                        else:
+                            connection_loss = torch.tensor(0., device=self.device)
+                # KL loss
+                kl_loss = posterior.kl(dims=(1,)).mean() * 1e-3 # 1e-3 before
+                total_loss += kl_loss
+        # Backpropagation
+        scaled_total_loss = total_loss / self.accum_steps
+        self.scaler.scale(scaled_total_loss).backward()
+        return {
+            'total_loss': total_loss.item(),
+            'kl_loss': kl_loss.item(),
+            'prune_loss': prune_loss_total.item(),
+            'vertex_loss': vertex_loss_total.item(),
+            'edge_loss': edge_loss_total.item(),
+            'offset_loss': mse_loss_feats.item(),
+            'direction_loss': mse_loss_dirs.item(),
+            'connection_loss': connection_loss.item(),
+        }
+    def train(self):
+        accum_steps = self.accum_steps
+        for epoch in range(self.cfg['training']['start_epoch'], self.cfg['training']['max_epochs']):
+            self.dataloader.sampler.set_epoch(epoch)
+            # Initialize metrics
+            metrics = {
+                'total_loss': 0.0,
+                'kl_loss': 0.0,
+                'prune_loss': 0.0,
+                'vertex_loss': 0.0,
+                'edge_loss': 0.0,
+                'offset_loss': 0.0,
+                'direction_loss': 0.0,
+                'connection_loss': 0.0,
+            }
+            num_batches = 0
+            self.optimizer.zero_grad(set_to_none=True)
+            for i, batch in enumerate(self.dataloader):
+                # Get all losses from train_step
+                if batch is None:
+                    continue
+                step_losses = self.train_step(batch)
+                # Accumulate losses
+                for key in metrics:
+                    metrics[key] += step_losses[key]
+                num_batches += 1
+                if (i + 1) % accum_steps == 0:
+                    self.scaler.unscale_(self.optimizer)
+                    torch.nn.utils.clip_grad_norm_(self.vae.parameters(), max_norm=1.0)
+                    torch.nn.utils.clip_grad_norm_(self.voxel_encoder.parameters(), max_norm=1.0)
+                    torch.nn.utils.clip_grad_norm_(self.connection_head.parameters(), max_norm=1.0)
+                    self.scaler.step(self.optimizer)
+                    self.scaler.update()
+                    self.optimizer.zero_grad(set_to_none=True)
+                    self.scheduler.step()
+                # Print batch-level metrics
+                if self.is_master:
+                    print(
+                        f"[Epoch {epoch}] Batch:{num_batches} "
+                        f"Loss: {step_losses['total_loss']:.4f}, "
+                        f"KLL: {step_losses['kl_loss']:.4f}, "
+                        f"PruneL: {step_losses['prune_loss']:.4f}, "
+                        f"VertexL: {step_losses['vertex_loss']:.4f}, "
+                        f"EdgeL: {step_losses['edge_loss']:.4f}, "
+                        f"OffsetL: {step_losses['offset_loss']:.4f}, "
+                        f"DireL: {step_losses['direction_loss']:.4f}, "
+                        f"ConL: {step_losses['connection_loss']:.4f}, "
+                        f"LR: {self.optimizer.param_groups[0]['lr']:.4e} "
+                    )
+                    # if i % 2000 == 0 and i != 0:
+                    #     self.save_checkpoint(epoch, step_losses['total_loss'], i)
+            if num_batches % accum_steps != 0:
+                self.scaler.unscale_(self.optimizer)
+                torch.nn.utils.clip_grad_norm_(self.vae.parameters(), max_norm=1.0)
+                torch.nn.utils.clip_grad_norm_(self.voxel_encoder.parameters(), max_norm=1.0)
+                torch.nn.utils.clip_grad_norm_(self.connection_head.parameters(), max_norm=1.0)
+                self.scaler.step(self.optimizer)
+                self.scaler.update()
+                self.optimizer.zero_grad(set_to_none=True)
+                self.scheduler.step()
+            # Calculate epoch averages
+            avg_metrics = {key: value / num_batches for key, value in metrics.items()}
+            self.train_loss_history.append(avg_metrics['total_loss'])
+            # Log to file
+            if self.is_master:
+                with open(self.log_file, "a") as f:
+                    current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+                    log_line = (
+                        f"Epoch {epoch:05d} | "
+                        f"Loss: {avg_metrics['total_loss']:.6f} "
+                        f"Avg KLL: {avg_metrics['kl_loss']:.4f} "
+                        f"Avg PruneL: {avg_metrics['prune_loss']:.4f} "
+                        f"Avg VertexL: {avg_metrics['vertex_loss']:.4f} "
+                        f"Avg EdgeL: {avg_metrics['edge_loss']:.4f} "
+                        f"Avg OffsetL: {avg_metrics['offset_loss']:.4f} "
+                        f"Avg DireL: {avg_metrics['direction_loss']:.4f} "
+                        f"Avg ConL: {avg_metrics['connection_loss']:.4f} "
+                        f"LR: {self.optimizer.param_groups[0]['lr']:.4e} "
+                        f"[{current_time}]\n"
+                    )
+                    f.write(log_line)
+                # Print epoch summary
+                print(
+                    f"[Epoch {epoch}] "
+                    f"Avg Loss: {avg_metrics['total_loss']:.4f} "
+                    f"Avg KLL: {avg_metrics['kl_loss']:.4f} "
+                    f"Avg PruneL: {avg_metrics['prune_loss']:.4f} "
+                    f"Avg VertexL: {avg_metrics['vertex_loss']:.4f} "
+                    f"Avg EdgeL: {avg_metrics['edge_loss']:.4f} "
+                    f"Avg OffsetL: {avg_metrics['offset_loss']:.4f} "
+                    f"Avg DireL: {avg_metrics['direction_loss']:.4f} "
+                    f"Avg ConL: {avg_metrics['connection_loss']:.4f} "
+                    f"[{current_time}]\n"
+                )
+                # Save checkpoint
+                if epoch % self.cfg['training']['save_every'] == 0:
+                    self.save_checkpoint(epoch, avg_metrics['total_loss'], i)
+            # Update learning rate
+            if self.is_master:
+                current_lr = self.optimizer.param_groups[0]['lr']
+                print(f"Epoch {epoch}: Learning rate updated to {current_lr:.2e}")
+            dist.barrier()
+def main():
+    # Initialize the process group
+    dist.init_process_group(backend='nccl')
+    # Get rank and world size from environment variables set by the launcher
+    rank = int(os.environ['RANK'])
+    world_size = int(os.environ['WORLD_SIZE'])
+    local_rank = int(os.environ['LOCAL_RANK'])
+    # Set the device for the current process. This is crucial.
+    torch.cuda.set_device(local_rank)
+    torch.manual_seed(42)
+    # with torch.cuda.amp.autocast(dtype=torch.bfloat16):
+    # Pass the distributed info to the Trainer
+    trainer = Trainer(
+        config_path="/root/Trisf/config_edge_1024_error_8enc_8dec_woself_finetune_128to256.yaml",
+        rank=rank,
+        world_size=world_size,
+        local_rank=local_rank
+    )
+    trainer.train()
+    # Clean up the process group
+    dist.destroy_process_group()
+if __name__ == '__main__':
+    main()

train_slat_vae_512_128to512_pointnet_head.py ADDED Viewed

	@@ -0,0 +1,1090 @@

+import torch.nn as nn
+import os
+# os.environ['ATTN_BACKEND'] = 'xformers'
+import yaml
+import torch
+import time
+from datetime import datetime
+from torch.utils.data import DataLoader
+from functools import partial
+from triposf.modules.sparse.basic import SparseTensor
+import torch.nn.functional as F
+from torch.optim import AdamW
+from torch.cuda.amp import GradScaler, autocast
+from triposf.models.triposf_vae.VoxelFeatureVAE_edge_woself_128to1024_decoder_head import VoxelVAE
+from vertex_encoder import VoxelFeatureEncoder_active_pointnet, ConnectionHead
+from dataset_triposf_head import VoxelVertexDataset_edge, collate_fn_pointnet
+from utils import load_pretrained_woself, AdaptiveFocalLoss, fast_isin, AsymmetricFocalLoss, DiceLoss
+import torch.distributed as dist
+from torch.nn.parallel import DistributedDataParallel as DDP
+from torch.utils.data.distributed import DistributedSampler
+from transformers import get_cosine_schedule_with_warmup
+import math
+import numpy as np
+import open3d as o3d
+def export_sampled_edges(coords, u, v, labels, edge_voxels=None, step_idx=0, save_dir="debug_viz"):
+    """
+    导出顶点、采样的边以及背景边缘体素用于可视化 (PLY格式)
+    """
+    os.makedirs(save_dir, exist_ok=True)
+    # 转为 CPU numpy
+    coords_np = coords.detach().cpu().numpy()  # [N, 4] (b, x, y, z)
+    u_np = u.detach().cpu().numpy()
+    v_np = v.detach().cpu().numpy()
+    labels_np = labels.detach().cpu().numpy().flatten()
+    edge_voxels_np = None
+    if edge_voxels is not None:
+        edge_voxels_np = edge_voxels.detach().cpu().numpy() # [M, 4]
+    # 按 Batch 处理 (因为 visualization 最好单个物体看)
+    batch_ids = coords_np[:, 0]
+    unique_batches = np.unique(batch_ids)
+    for b_id in unique_batches:
+        # 1. 筛选当前 Batch 的顶点
+        mask_b = (batch_ids == b_id)
+        # 获取局部坐标 (x,y,z)
+        curr_verts = coords_np[mask_b, 1:4]
+        # 建立 全局索引 -> 局部索引 的映射
+        # u 和 v 是基于整个 batch 的全局索引，需要转换
+        global_indices = np.where(mask_b)[0]
+        # 创建一个大的映射数组 (假设索引范围不超过 max_len)
+        max_idx = np.max(global_indices) + 1
+        global_to_local = np.full(max_idx, -1)
+        global_to_local[global_indices] = np.arange(len(global_indices))
+        # 2. 筛选当前 Batch 相关的边
+        # 边的两个端点都必须属于当前 batch
+        # 只要检查 u 是否在当前 batch 范围内即可 (假设跨 batch 不连线)
+        batch_edge_mask = np.isin(u_np, global_indices)
+        curr_u_global = u_np[batch_edge_mask]
+        curr_v_global = v_np[batch_edge_mask]
+        curr_labels = labels_np[batch_edge_mask]
+        # 转为局部索引 (用于构建 mesh)
+        curr_u = global_to_local[curr_u_global]
+        curr_v = global_to_local[curr_v_global]
+        # 3. 筛选当前 Batch 的 Edge Voxels
+        curr_edge_voxels = None
+        if edge_voxels_np is not None:
+            mask_ev = (edge_voxels_np[:, 0] == b_id)
+            curr_edge_voxels = edge_voxels_np[mask_ev, 1:4]
+        # 4. 写入 PLY 文件
+        filename = os.path.join(save_dir, f"step_{step_idx}_batch_{int(b_id)}.ply")
+        write_ply_with_edges_and_voxels(
+            filename,
+            curr_verts,
+            curr_u,
+            curr_v,
+            curr_labels,
+            curr_edge_voxels
+        )
+        print(f"Saved visualization to {filename}")
+def write_ply_with_edges_and_voxels(filename, verts, u, v, labels, edge_voxels=None):
+    """
+    手动写入 PLY 文件，包含顶点、边和背景体素点。
+    为了区分，我们将它们合并到一个文件中，但使用颜色区分。
+    """
+    # ------------------------------------------------
+    # 准备数据
+    # ------------------------------------------------
+    # 1. 节点 (Vertices) -> 设为 青色
+    num_verts = len(verts)
+    vertex_colors = np.tile([0, 255, 255], (num_verts, 1))
+    # 2. 边缘体素 (Edge Voxels) -> 设为 灰色 (作为背景)
+    if edge_voxels is not None:
+        num_ev = len(edge_voxels)
+        # 为了放在同一个 vertex buffer，我们把 edge voxels 追加到 verts 后面
+        all_points = np.vstack([verts, edge_voxels])
+        ev_colors = np.tile([180, 180, 180], (num_ev, 1))
+        all_colors = np.vstack([vertex_colors, ev_colors])
+    else:
+        all_points = verts
+        all_colors = vertex_colors
+        num_ev = 0
+    num_total_points = len(all_points)
+    # 3. 边 (Edges)
+    # PLY 格式的 edge 索引是基于当前点列表的
+    # u, v 已经是基于 verts 的局部索引了，不需要偏移 (因为 verts 在最前面)
+    # 筛选：我们通常只想看 Positive 的边 (labels==1)，或者用颜色区分
+    # 这里全部写入，用颜色区分
+    # 红色 = 负样本 (预测连接但实际上没连/负采样)
+    # 绿色 = 正样本 (GT连接)
+    edge_list = np.stack([u, v], axis=1)
+    num_edges = len(edge_list)
+    edge_colors = np.zeros((num_edges, 3), dtype=int)
+    edge_colors[labels == 1] = [0, 255, 0]   # Green for Positive
+    edge_colors[labels == 0] = [255, 0, 0]   # Red for Negative
+    # ------------------------------------------------
+    # 写入文件 Header
+    # ------------------------------------------------
+    with open(filename, 'w') as f:
+        f.write("ply\n")
+        f.write("format ascii 1.0\n")
+        # Vertex Element (包含 Nodes 和 EdgeVoxels)
+        f.write(f"element vertex {num_total_points}\n")
+        f.write("property float x\n")
+        f.write("property float y\n")
+        f.write("property float z\n")
+        f.write("property uchar red\n")
+        f.write("property uchar green\n")
+        f.write("property uchar blue\n")
+        # Edge Element (包含采样的连接)
+        f.write(f"element edge {num_edges}\n")
+        f.write("property list uchar int vertex_indices\n")
+        f.write("property uchar red\n")
+        f.write("property uchar green\n")
+        f.write("property uchar blue\n")
+        f.write("end_header\n")
+        # ------------------------------------------------
+        # 写入数据 Body
+        # ------------------------------------------------
+        # 1. Write Points (Vertices + Edge Voxels)
+        for i in range(num_total_points):
+            p = all_points[i]
+            c = all_colors[i]
+            f.write(f"{p[0]:.4f} {p[1]:.4f} {p[2]:.4f} {int(c[0])} {int(c[1])} {int(c[2])}\n")
+        # 2. Write Edges
+        for i in range(num_edges):
+            e = edge_list[i]
+            c = edge_colors[i]
+            f.write(f"2 {int(e[0])} {int(e[1])} {int(c[0])} {int(c[1])} {int(c[2])}\n")
+def flatten_coords_4d(coords_4d: torch.Tensor):
+    coords_4d_long = coords_4d.long()
+    base_x = 512
+    base_y = 512 * 512
+    base_z = 512 * 512 * 512
+    flat_coords = coords_4d_long[:, 0] * base_z + \
+                  coords_4d_long[:, 1] * base_y + \
+                  coords_4d_long[:, 2] * base_x + \
+                  coords_4d_long[:, 3]
+    return flat_coords
+def downsample_voxels(
+    voxels: torch.Tensor,
+    input_resolution: int,
+    output_resolution: int
+) -> torch.Tensor:
+    if input_resolution % output_resolution != 0:
+        raise ValueError(f"input_resolution ({input_resolution}) must be divisible "
+                         f"by output_resolution ({output_resolution}).")
+    factor = input_resolution // output_resolution
+    downsampled_voxels = voxels.clone().to(torch.long)
+    downsampled_voxels[:, 1:] //= factor
+    unique_downsampled_voxels = torch.unique(downsampled_voxels, dim=0)
+    return unique_downsampled_voxels
+class Trainer:
+    def __init__(self, config_path, rank, world_size, local_rank):
+        self.rank = rank
+        self.world_size = world_size
+        self.local_rank = local_rank
+        self.is_master = self.rank == 0
+        self.load_config(config_path)
+        self.accum_steps = max(1, 4 // self.cfg['training']['batch_size'])
+        self.config_hash = self.save_config_with_hash()
+        self.init_device()
+        self.init_dirs()
+        self.init_components()
+        self.init_training()
+        self.train_loss_history = []
+        self.eval_loss_history = []
+        self.best_eval_loss = float('inf')
+    def save_config_with_hash(self):
+        import hashlib
+        # Serialize config to hash
+        config_str = yaml.dump(self.cfg)
+        config_hash = hashlib.md5(config_str.encode()).hexdigest()[:8]
+        # Prepare all flags as string for formatting
+        add_block_embed_flag = "True" if self.cfg['model']['add_block_embed'] else "False"
+        using_attn_flag = "True" if self.cfg['model']['using_attn'] else "False"
+        dataset_name = os.path.basename(self.cfg['dataset']['path'])
+        # Format save_dir with all placeholders
+        self.cfg['experiment']['save_dir'] = self.cfg['experiment']['save_dir'].format(
+            dataset_name=dataset_name,
+            config_hash=config_hash,
+            n_train_samples=self.cfg['dataset']['n_train_samples'],
+            multires=self.cfg['model']['multires'],
+            add_block_embed=add_block_embed_flag,
+            using_attn=using_attn_flag,
+            batch_size=self.cfg['training']['batch_size'],
+        )
+        if self.is_master:
+            os.makedirs(self.save_dir, exist_ok=True)
+            config_path = os.path.join(self.save_dir, "config.yaml")
+            with open(config_path, 'w') as f:
+                yaml.dump(self.cfg, f)
+        dist.barrier()
+        return config_hash
+    def save_checkpoint(self, epoch, avg_loss, batch_idx):
+        if not self.is_master:
+            return
+        checkpoint_path = os.path.join(self.save_dir, f"checkpoint_epoch{epoch}_batch{batch_idx}_loss{avg_loss:.4f}.pt")
+        config_path = os.path.join(self.save_dir, "config.yaml")
+        torch.save({
+            'voxel_encoder': self.voxel_encoder.module.state_dict(),
+            'vae': self.vae.module.state_dict(),
+            'connection_head': self.connection_head.module.state_dict(),
+            'epoch': epoch,
+            'loss': avg_loss,
+            'config': self.cfg
+        }, checkpoint_path)
+        def quoted_presenter(dumper, data):
+            return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='"')
+        yaml.add_representer(str, quoted_presenter)
+        with open(config_path, 'w') as f:
+            yaml.dump(self.cfg, f)
+    def load_config(self, config_path):
+        with open(config_path) as f:
+            self.cfg = yaml.safe_load(f)
+        # Extract and convert flags for formatting
+        add_block_embed_flag = "True" if self.cfg['model']['add_block_embed'] else "False"
+        using_attn_flag = "True" if self.cfg['model']['using_attn'] else "False"
+        dataset_name = os.path.basename(self.cfg['dataset']['path'])
+        self.save_dir = self.cfg['experiment']['save_dir'].format(
+            dataset_name=dataset_name,
+            n_train_samples=self.cfg['dataset']['n_train_samples'],
+            multires=self.cfg['model']['multires'],
+            add_block_embed=add_block_embed_flag,
+            using_attn=using_attn_flag,
+            batch_size=self.cfg['training']['batch_size'],
+        )
+        if self.is_master:
+            os.makedirs(self.save_dir, exist_ok=True)
+        dist.barrier()
+    def init_device(self):
+        self.device = torch.device(f"cuda:{self.local_rank}")
+    def init_dirs(self):
+        self.log_file = os.path.join(self.save_dir, f"training_log_{self.cfg['training']['lr']}.txt")
+        if self.is_master:
+            with open(self.log_file, "a") as f:
+                current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+                f.write(f"[{current_time}] Config loaded for distributed training with world size {self.world_size}\n")
+    def init_components(self):
+        self.dataset = VoxelVertexDataset_edge(
+            root_dir=self.cfg['dataset']['path'],
+            base_resolution=self.cfg['dataset']['base_resolution'],
+            min_resolution=self.cfg['dataset']['min_resolution'],
+            cache_dir=self.cfg['dataset']['cache_dir'],
+            renders_dir=self.cfg['dataset']['renders_dir'],
+            filter_active_voxels=self.cfg['dataset']['filter_active_voxels'],
+            cache_filter_path=self.cfg['dataset']['cache_filter_path'],
+            active_voxel_res=128,
+            pc_sample_number=819200,
+            sample_type=self.cfg['dataset']['sample_type'],
+        )
+        self.sampler = DistributedSampler(
+            self.dataset,
+            num_replicas=self.world_size,
+            rank=self.rank,
+            shuffle=True,
+        )
+        self.dataloader = DataLoader(
+            self.dataset,
+            batch_size=self.cfg['training']['batch_size'],
+            shuffle=False,
+            collate_fn=partial(collate_fn_pointnet,),
+            num_workers=self.cfg['training']['num_workers'],
+            pin_memory=True,
+            sampler=self.sampler,
+            # prefetch_factor=4,
+            persistent_workers=True,
+        )
+        self.voxel_encoder = VoxelFeatureEncoder_active_pointnet(
+            in_channels=15,
+            hidden_dim=256,
+            out_channels=1024,
+            scatter_type='mean',
+            n_blocks=5,
+            resolution=128,
+            add_label=False,
+        ).to(self.device)
+        self.connection_head = ConnectionHead(
+            channels=128 * 2,
+            out_channels=1,
+            mlp_ratio=4,
+        ).to(self.device)
+        # ablation 3: voxelvae_1volume, have tested
+        self.vae = VoxelVAE(
+            in_channels=self.cfg['model']['in_channels'],
+            latent_dim=self.cfg['model']['latent_dim'],
+            encoder_blocks=self.cfg['model']['encoder_blocks'],
+            decoder_blocks_vtx=self.cfg['model']['decoder_blocks_vtx'],
+            decoder_blocks_edge=self.cfg['model']['decoder_blocks_edge'],
+            num_heads=8,
+            num_head_channels=64,
+            mlp_ratio=4.0,
+            attn_mode="swin",
+            window_size=8,
+            pe_mode="ape",
+            use_fp16=False,
+            use_checkpoint=True,
+            qk_rms_norm=False,
+            using_subdivide=True,
+            using_attn=self.cfg['model']['using_attn'],
+            attn_first=self.cfg['model'].get('attn_first', True),
+            pred_direction=self.cfg['model'].get('pred_direction', False),
+        ).to(self.device)
+        if self.cfg['training']['from_pretrained']:
+            load_pretrained_woself(
+                checkpoint_path=self.cfg['training']['checkpoint_path'],
+                voxel_encoder=self.voxel_encoder,
+                vae=self.vae,
+                connection_head=self.connection_head,
+                optimizer=None,
+            )
+        self.voxel_encoder = DDP(self.voxel_encoder, device_ids=[self.local_rank], find_unused_parameters=False)
+        self.connection_head = DDP(self.connection_head, device_ids=[self.local_rank], find_unused_parameters=False)
+        self.vae = DDP(self.vae, device_ids=[self.local_rank], find_unused_parameters=False)
+    def init_training(self):
+        self.optimizer = AdamW(
+            list(self.vae.module.parameters()) +
+            list(self.voxel_encoder.module.parameters()) +
+            list(self.connection_head.module.parameters()),
+            lr=self.cfg['training']['lr'],
+            weight_decay=0.01,
+        )
+        num_update_steps_per_epoch = math.ceil(len(self.dataloader) / self.accum_steps)
+        max_epochs = self.cfg['training']['max_epochs']
+        num_training_steps = max_epochs * num_update_steps_per_epoch
+        num_warmup_steps = 200
+        self.scheduler = get_cosine_schedule_with_warmup(
+            self.optimizer,
+            num_warmup_steps=num_warmup_steps,
+            num_training_steps=num_training_steps
+        )
+        self.focal_loss = AdaptiveFocalLoss(gamma=2.0, max_alpha=10.0).to(self.device)
+        self.mse_loss = nn.MSELoss(reduction='mean').to(self.device)
+        self.asyloss = AsymmetricFocalLoss(
+            gamma_pos=0.0,
+            gamma_neg=4.0,
+            clip=0.05,
+        )
+        self.bce_loss = torch.nn.BCEWithLogitsLoss()
+        self.dice_loss = DiceLoss()
+        self.scaler = GradScaler()
+    def train_step(self, batch):
+        """Modified training step that handles vertex and edge voxels separately after initial prediction."""
+        # 1. Retrieve data from batch
+        combined_voxels_512 = batch['combined_voxels_512'].to(self.device)
+        combined_voxel_labels_512 = batch['combined_voxel_labels_512'].to(self.device)
+        gt_vertex_voxels_512 = batch['gt_vertex_voxels_512'].to(self.device)
+        gt_edges = batch['gt_vertex_edge_indices_512'].to(self.device)
+        edge_mask = (combined_voxel_labels_512 == 1)
+        vtx_128 = downsample_voxels(gt_vertex_voxels_512, input_resolution=512, output_resolution=128)
+        vtx_256 = downsample_voxels(gt_vertex_voxels_512, input_resolution=512, output_resolution=256)
+        vtx_512 = gt_vertex_voxels_512
+        edge_128 = downsample_voxels(combined_voxels_512, input_resolution=512, output_resolution=128)
+        edge_256 = downsample_voxels(combined_voxels_512, input_resolution=512, output_resolution=256)
+        edge_512 = combined_voxels_512
+        active_coords = batch['active_voxels_128'].to(self.device)
+        point_cloud = batch['point_cloud_128'].to(self.device)
+        with torch.cuda.amp.autocast(dtype=torch.bfloat16):
+            active_voxel_feats = self.voxel_encoder(
+                p=point_cloud,
+                sparse_coords=active_coords,
+                res=128,
+                bbox_size=(-0.5, 0.5),
+            )
+            sparse_input = SparseTensor(
+                feats=active_voxel_feats,
+                coords=active_coords.int()
+            )
+            gt_edge_voxels_list = [
+                edge_128,
+                edge_256,
+                edge_512,
+            ]
+            gt_vertex_voxels_list = [
+                vtx_128,
+                vtx_256,
+                vtx_512,
+            ]
+            results, posterior, latent_128 = self.vae(
+                sparse_input,
+                gt_vertex_voxels_list=gt_vertex_voxels_list,
+                gt_edge_voxels_list=gt_edge_voxels_list,
+                training=True,
+                sample_ratio=0.,
+            )
+            # print("results[-1]['edge']['coords_4d'][1827:1830]", results[-1]['edge']['coords_4d'][1827:1830])
+            total_loss = 0.
+            prune_loss_total = 0.
+            vertex_loss_total = 0.
+            edge_loss_total=0.
+            with autocast(dtype=torch.bfloat16):
+                initial_result = results[0]
+                vertex_mask = initial_result['vertex_mask']
+                vtx_logits = initial_result['vtx_feats']
+                vertex_loss = self.asyloss(vtx_logits.squeeze(-1), vertex_mask.float())
+                edge_mask = initial_result['edge_mask']
+                edge_logits = initial_result['edge_feats']
+                edge_loss = self.asyloss(edge_logits.squeeze(-1), edge_mask.float())
+                vertex_loss_total += vertex_loss
+                edge_loss_total += edge_loss
+                total_loss += vertex_loss
+                total_loss += edge_loss
+                # Process each level's results
+                for idx, res_dict in enumerate(results[1:], start=1):
+                    # Vertex branch losses
+                    vertex_pred_coords = res_dict['vertex']['occ_coords']
+                    vertex_occ_probs = res_dict['vertex']['occ_probs']
+                    vertex_gt_coords = res_dict['vertex']['coords']
+                    vertex_labels = fast_isin(vertex_pred_coords, vertex_gt_coords, resolution=512).float()
+                    # print('vertex_labels.sum()', vertex_labels.sum(), idx)
+                    vertex_logits = vertex_occ_probs.squeeze()
+                    # if vertex_labels.sum() > 0 and vertex_labels.sum() < len(vertex_labels):
+                    vertex_prune_loss = self.focal_loss(vertex_logits, vertex_labels)
+                    # vertex_prune_loss = self.dice_loss(vertex_logits, vertex_labels)
+                    # dilation 1: bce loss
+                    # vertex_prune_loss = self.bce_loss(vertex_logits, vertex_labels,)
+                    prune_loss_total += vertex_prune_loss
+                    total_loss += vertex_prune_loss
+                    # Edge branch losses
+                    edge_pred_coords = res_dict['edge']['occ_coords']
+                    edge_occ_probs = res_dict['edge']['occ_probs']
+                    edge_gt_coords = res_dict['edge']['coords']
+                    edge_labels = fast_isin(edge_pred_coords, edge_gt_coords, resolution=512).float()
+                    # print('edge_labels.sum()', edge_labels.sum(), idx)
+                    edge_logits = edge_occ_probs.squeeze()
+                    # if edge_labels.sum() > 0 and edge_labels.sum() < len(edge_labels):
+                    edge_prune_loss = self.focal_loss(edge_logits, edge_labels)
+                    # dilation 1: bce loss
+                    # edge_prune_loss = self.bce_loss(edge_logits, edge_labels,)
+                    prune_loss_total += edge_prune_loss
+                    total_loss += edge_prune_loss
+                    if idx == 2:
+                        # pred_coords = res_dict['edge']['coords_4d']              # [N,4] (b,x,y,z)
+                        # pred_feats  = res_dict['edge']['predicted_offset_feats'] # [N,C]
+                        # gt_coords = gt_edge_voxels_512.to(pred_coords.device)    # [M,4]
+                        # gt_feats  = gt_edge_errors_512[:, 1:].to(pred_coords.device)  # [M,C]
+                        # pred_keys = flatten_coords_4d(pred_coords)
+                        # gt_keys   = flatten_coords_4d(gt_coords)
+                        # sorted_pred_keys, pred_order = torch.sort(pred_keys)
+                        # pred_coords_sorted = pred_coords[pred_order]
+                        # pred_feats_sorted  = pred_feats[pred_order]
+                        # sorted_gt_keys, gt_order = torch.sort(gt_keys)
+                        # gt_coords_sorted = gt_coords[gt_order]
+                        # gt_feats_sorted  = gt_feats[gt_order]
+                        # pos = torch.searchsorted(sorted_gt_keys, sorted_pred_keys)
+                        # valid_mask = (pos < len(sorted_gt_keys)) & (sorted_gt_keys[pos] == sorted_pred_keys)
+                        # if valid_mask.any():
+                        #     # print('valid_mask.sum()', valid_mask.sum())
+                        #     matched_pred_feats = pred_feats_sorted[valid_mask]
+                        #     matched_gt_feats   = gt_feats_sorted[pos[valid_mask]]
+                        #     mse_loss_feats = self.mse_loss(matched_pred_feats, matched_gt_feats * 2)
+                        #     total_loss += mse_loss_feats * 0.
+                        #     if self.cfg['model'].get('pred_direction', False):
+                        #         pred_dirs = res_dict['edge']['predicted_direction_feats']
+                        #         dir_gt_device = dir_gt.to(pred_coords.device)
+                        #         pred_dirs_sorted = pred_dirs[pred_order]
+                        #         dir_gt_sorted = dir_gt_device[gt_order]
+                        #         matched_pred_dirs = pred_dirs_sorted[valid_mask]
+                        #         matched_gt_dirs   = dir_gt_sorted[pos[valid_mask]]
+                        #         mse_loss_dirs = self.mse_loss(matched_pred_dirs, matched_gt_dirs)
+                        #         total_loss += mse_loss_dirs * 0.
+                        # else:
+                        #     mse_loss_feats = torch.tensor(0., device=pred_coords.device)
+                        #     if self.cfg['model'].get('pred_direction', False):
+                        #         mse_loss_dirs = torch.tensor(0., device=pred_coords.device)
+                        mse_loss_dirs = torch.tensor(0., device=self.device)
+                        mse_loss_feats = torch.tensor(0., device=self.device)
+                        # --- Vertex Branch (Connection Loss 核心) ---
+                        vtx_pred_coords = res_dict['vertex']['coords_4d']  # [N, 4]
+                        vtx_pred_feats  = res_dict['vertex']['feats']      # [N, C]
+                        # 1.1 排序 (既用于��配 GT，也用于快速寻找空间邻居)
+                        vtx_pred_keys = flatten_coords_4d(vtx_pred_coords)
+                        vtx_pred_keys_sorted, vtx_pred_order = torch.sort(vtx_pred_keys)
+                        # 1.2 匹配 GT
+                        vtx_gt_keys = flatten_coords_4d(gt_vertex_voxels_512.to(self.device))
+                        vtx_pos = torch.searchsorted(vtx_pred_keys_sorted, vtx_gt_keys)
+                        vtx_pos = vtx_pos.clamp(max=len(vtx_pred_keys_sorted) - 1)
+                        vtx_match_mask = (vtx_pred_keys_sorted[vtx_pos] == vtx_gt_keys)
+                        gt_to_pred_mapping = torch.full((len(vtx_gt_keys),), -1, device=self.device, dtype=torch.long)
+                        matched_pred_indices = vtx_pred_order[vtx_pos[vtx_match_mask]]
+                        gt_to_pred_mapping[vtx_match_mask] = matched_pred_indices
+                        # ====================================================
+                        # 2. 构建核心数据：正样本 Hash 集合
+                        # ====================================================
+                        # 这里的 pos_u/pos_v 仅用于构建 "什么是真连接" 的查询表
+                        u_gt, v_gt = gt_edges[:, 0], gt_edges[:, 1]
+                        u_pred = gt_to_pred_mapping[u_gt]
+                        v_pred = gt_to_pred_mapping[v_gt]
+                        valid_edge_mask = (u_pred != -1) & (v_pred != -1)
+                        real_pos_u = u_pred[valid_edge_mask]
+                        real_pos_v = v_pred[valid_edge_mask]
+                        num_real_pos = real_pos_u.shape[0]
+                        num_total_nodes = vtx_pred_coords.shape[0]
+                        # if num_real_pos > 0:
+                        #     # 2. 构建候选样本 (Candidate Generation)
+                        #     # ====================================================
+                        #     cand_u_list = []
+                        #     cand_v_list = []
+                        #     batch_ids = vtx_pred_coords[:, 0]
+                        #     unique_batches = torch.unique(batch_ids)
+                        #     RADIUS = 32
+                        #     MAX_PTS_FOR_DIST = 12000
+                        #     K_RANDOM = 32
+                        #     for b_id in unique_batches:
+                        #         mask_b = (batch_ids == b_id)
+                        #         indices_b = torch.nonzero(mask_b).squeeze(-1) # Global indices
+                        #         coords_b = vtx_pred_coords[mask_b, 1:4].float() # (x,y,z)
+                        #         num_b = coords_b.shape[0]
+                        #         if num_b < 2: continue
+                        #         # --- A. Radius Graph (Hard Negatives) ---
+                        #         if num_b <= MAX_PTS_FOR_DIST:
+                        #             # 计算距离矩阵 [M, M]
+                        #             # 注意：autocast 下 float16 的 cdist 可能精度不够，建议转 float32
+                        #             dist_mat = torch.cdist(coords_b.float(), coords_b.float())
+                        #             # 找到距离小于 Radius 的点对 (排除自环)
+                        #             adj_mat = (dist_mat < RADIUS) & (dist_mat > 1e-6)
+                        #             # 提取索引 (local indices in batch)
+                        #             src_local, dst_local = torch.nonzero(adj_mat, as_tuple=True)
+                        #             # 映射回全局索引
+                        #             cand_u_list.append(indices_b[src_local])
+                        #             cand_v_list.append(indices_b[dst_local])
+                        #         else:
+                        #             # 如果点太多，显存不够，退化为随机局部采样或跳过
+                        #             # 这里简单处理：跳过 Radius Graph，依赖 Random
+                        #             pass
+                        #         # --- B. Random Sampling (Easy Negatives) ---
+                        #         # 随机生成 num_b * K 对
+                        #         n_rand = num_b * K_RANDOM
+                        #         rand_src_local = torch.randint(0, num_b, (n_rand,), device=self.device)
+                        #         rand_dst_local = torch.randint(0, num_b, (n_rand,), device=self.device)
+                        #         # 映射回全局索引
+                        #         cand_u_list.append(indices_b[rand_src_local])
+                        #         cand_v_list.append(indices_b[rand_dst_local])
+                        #     # 合并所有来源 (GT + Radius + Random)
+                        #     # 注意：我们把 real_pos 也加进来，确保正样本一定在列表里
+                        #     all_u = torch.cat([real_pos_u] + cand_u_list)
+                        #     all_v = torch.cat([real_pos_v] + cand_v_list)
+                        #     # 3. 去重与 Labeling (Deduplication & Labeling)
+                        #     # ====================================================
+                        #     # 构造无向边 Hash: min * N + max
+                        #     # 确保 MAX_NODES 足够大，比如 1000000 或 num_total_nodes
+                        #     HASH_BASE = num_total_nodes + 100
+                        #     p_min = torch.min(all_u, all_v)
+                        #     p_max = torch.max(all_u, all_v)
+                        #     # 过滤掉自环 (u==v)
+                        #     valid_pair = (p_min != p_max)
+                        #     p_min = p_min[valid_pair]
+                        #     p_max = p_max[valid_pair]
+                        #     all_hashes = p_min.long() * HASH_BASE + p_max.long()
+                        #     # --- 核心：去重 ---
+                        #     unique_hashes = torch.unique(all_hashes)
+                        #     # 解码回 u, v
+                        #     final_u = unique_hashes // HASH_BASE
+                        #     final_v = unique_hashes % HASH_BASE
+                        #     # --- Labeling ---
+                        #     # 构建 GT 的 Hash 表用于查询
+                        #     gt_min = torch.min(real_pos_u, real_pos_v)
+                        #     gt_max = torch.max(real_pos_u, real_pos_v)
+                        #     gt_hashes = gt_min.long() * HASH_BASE + gt_max.long()
+                        #     gt_hashes = torch.unique(gt_hashes) # GT 也去重一下保险
+                        #     gt_hashes_sorted, _ = torch.sort(gt_hashes)
+                        #     # 查询 unique_hashes 是否在 gt_hashes 中
+                        #     # 使用 searchsorted
+                        #     idx_search = torch.searchsorted(gt_hashes_sorted, unique_hashes)
+                        #     idx_search = idx_search.clamp(max=len(gt_hashes_sorted) - 1)
+                        #     is_connected = (gt_hashes_sorted[idx_search] == unique_hashes)
+                        #     targets = is_connected.float().unsqueeze(-1) # [N_pairs, 1]
+                        #     # 4. 前向传播与 Loss
+                        #     # ====================================================
+                        #     feat_u = vtx_pred_feats[final_u]
+                        #     feat_v = vtx_pred_feats[final_v]
+                        #     # 对称特征融合
+                        #     feat_uv = torch.cat([feat_u, feat_v], dim=-1)
+                        #     feat_vu = torch.cat([feat_v, feat_u], dim=-1)
+                        #     logits_uv = self.connection_head(feat_uv)
+                        #     logits_vu = self.connection_head(feat_vu)
+                        #     logits = logits_uv + logits_vu
+                        #     # print('targets.sum()', targets.sum())
+                        #     # print('targets.shape', targets.shape)
+                        #     # viz_edge_voxels = combined_voxels_512
+                        #     # export_sampled_edges(
+                        #     #     coords=vtx_pred_coords,   # [N, 4] (Batch, X, Y, Z) - 顶点
+                        #     #     u=final_u,                # [E] - 连线起点索引
+                        #     #     v=final_v,                # [E] - 连线终点索引
+                        #     #     labels=targets,           # [E, 1] - 连线标签 (1=Pos, 0=Neg)
+                        #     #     edge_voxels=viz_edge_voxels, # [M, 4] - 新增：512分辨率边缘体素
+                        #     #     step_idx=0,               # 或者传入当前的 step/epoch
+                        #     #     save_dir="debug_viz"      # 建议指定保存路径
+                        #     # )
+                        #     # exit()
+                        #     # asyloss Loss
+                        #     connection_loss = self.asyloss(logits, targets)
+                        #     total_loss += connection_loss
+                        # else:
+                        #     connection_loss = torch.tensor(0., device=self.device)
+                        if num_real_pos > 0:
+                            # ====================================================
+                            # 2. 构建候选样本 (Candidate Generation) - KNN + Random 版
+                            # ====================================================
+                            cand_u_list = []
+                            cand_v_list = []
+                            batch_ids = vtx_pred_coords[:, 0]
+                            unique_batches = torch.unique(batch_ids)
+                            # === 配置 ===
+                            K_KNN = 64
+                            K_RANDOM = 32
+                            # 这样每个点大约产生 32 条边，总显存 = N * 32 * MLP大小，非常稳定
+                            # 安全阈值：如果点数太多，cdist矩阵本身会爆，需要限制
+                            MAX_PTS_FOR_KNN = 12000
+                            for b_id in unique_batches:
+                                mask_b = (batch_ids == b_id)
+                                indices_b = torch.nonzero(mask_b).squeeze(-1) # Global indices
+                                coords_b = vtx_pred_coords[mask_b, 1:4].float()
+                                num_b = coords_b.shape[0]
+                                if num_b < 2: continue
+                                # --- A. KNN Graph (距离最近的 K 个) ---
+                                # 只有当点数在可接受范围内时才算矩阵，否则只用随机
+                                if num_b <= MAX_PTS_FOR_KNN:
+                                    # 1. 计算距离矩阵 [N, N]
+                                    # 注意: 12000个点产生的矩阵约 576MB，非常安全
+                                    dist_mat = torch.cdist(coords_b, coords_b)
+                                    # 2. 取最近的 K+1 个 (包含自己)
+                                    # largest=False 表示取最小距离
+                                    k_val = min(K_KNN + 1, num_b)
+                                    _, knn_indices_local = torch.topk(dist_mat, k=k_val, dim=1, largest=False)
+                                    # 3. 去掉自己 (第0个通常是距离为0的自己)
+                                    knn_indices_local = knn_indices_local[:, 1:]
+                                    # 4. 构建边索引
+                                    # repeat_interleave 生成源点: [0,0,0, 1,1,1...]
+                                    src_local = torch.arange(num_b, device=self.device).repeat_interleave(knn_indices_local.shape[1])
+                                    dst_local = knn_indices_local.flatten()
+                                    cand_u_list.append(indices_b[src_local])
+                                    cand_v_list.append(indices_b[dst_local])
+                                # --- B. Random Sampling (随机 K 个) ---
+                                # 不管点多点少，都可以做随机
+                                n_rand = num_b * K_RANDOM
+                                if n_rand > 0:
+                                    rand_src = torch.randint(0, num_b, (n_rand,), device=self.device)
+                                    rand_dst = torch.randint(0, num_b, (n_rand,), device=self.device)
+                                    cand_u_list.append(indices_b[rand_src])
+                                    cand_v_list.append(indices_b[rand_dst])
+                            if len(cand_u_list) > 0:
+                                all_u = torch.cat([real_pos_u] + cand_u_list)
+                                all_v = torch.cat([real_pos_v] + cand_v_list)
+                            else:
+                                all_u = real_pos_u
+                                all_v = real_pos_v
+                            # ====================================================
+                            # 3. 去重与 Labeling (Logic 保持不变)
+                            # ====================================================
+                            HASH_BASE = num_total_nodes + 100
+                            p_min = torch.min(all_u, all_v)
+                            p_max = torch.max(all_u, all_v)
+                            valid_pair = (p_min != p_max)
+                            p_min = p_min[valid_pair]
+                            p_max = p_max[valid_pair]
+                            all_hashes = p_min.long() * HASH_BASE + p_max.long()
+                            # 去重：因为 KNN 和 Random 可能会重复，或者和 GT 重复
+                            unique_hashes = torch.unique(all_hashes)
+                            # 【注意】这里不需要再做 Max Limit 截断了
+                            # 因为边数严格受控于 (N * K)，不会出现数量级爆炸的情况。
+                            # 解码回 u, v
+                            final_u = unique_hashes // HASH_BASE
+                            final_v = unique_hashes % HASH_BASE
+                            # --- Labeling ---
+                            gt_min = torch.min(real_pos_u, real_pos_v)
+                            gt_max = torch.max(real_pos_u, real_pos_v)
+                            gt_hashes = gt_min.long() * HASH_BASE + gt_max.long()
+                            gt_hashes = torch.unique(gt_hashes)
+                            gt_hashes_sorted, _ = torch.sort(gt_hashes)
+                            idx_search = torch.searchsorted(gt_hashes_sorted, unique_hashes)
+                            idx_search = idx_search.clamp(max=len(gt_hashes_sorted) - 1)
+                            is_connected = (gt_hashes_sorted[idx_search] == unique_hashes)
+                            targets = is_connected.float().unsqueeze(-1)
+                            feat_u = vtx_pred_feats[final_u]
+                            feat_v = vtx_pred_feats[final_v]
+                            feat_uv = torch.cat([feat_u, feat_v], dim=-1)
+                            feat_vu = torch.cat([feat_v, feat_u], dim=-1)
+                            logits_uv = self.connection_head(feat_uv)
+                            logits_vu = self.connection_head(feat_vu)
+                            logits = logits_uv + logits_vu
+                            # viz_edge_voxels = combined_voxels_512
+                            # export_sampled_edges(
+                            #     coords=vtx_pred_coords,   # [N, 4] (Batch, X, Y, Z) - 顶点
+                            #     u=final_u,                # [E] - 连线起点索引
+                            #     v=final_v,                # [E] - 连线终点索引
+                            #     labels=targets,           # [E, 1] - 连线标签 (1=Pos, 0=Neg)
+                            #     edge_voxels=viz_edge_voxels, # [M, 4] - 新增：512分辨率边缘体素
+                            #     step_idx=0,               # 或者传入当前的 step/epoch
+                            #     save_dir="debug_viz"      # 建议指定保存路径
+                            # )
+                            # exit()
+                            connection_loss = self.asyloss(logits, targets)
+                            total_loss += connection_loss
+                        else:
+                            connection_loss = torch.tensor(0., device=self.device)
+                # KL loss
+                kl_loss = posterior.kl(dims=(1,)).mean() * 1e-3 # 1e-3 before
+                total_loss += kl_loss
+        # Backpropagation
+        scaled_total_loss = total_loss / self.accum_steps
+        self.scaler.scale(scaled_total_loss).backward()
+        return {
+            'total_loss': total_loss.item(),
+            'kl_loss': kl_loss.item(),
+            'prune_loss': prune_loss_total.item(),
+            'vertex_loss': vertex_loss_total.item(),
+            'edge_loss': edge_loss_total.item(),
+            'offset_loss': mse_loss_feats.item(),
+            'direction_loss': mse_loss_dirs.item(),
+            'connection_loss': connection_loss.item(),
+        }
+    def train(self):
+        accum_steps = self.accum_steps
+        for epoch in range(self.cfg['training']['start_epoch'], self.cfg['training']['max_epochs']):
+            self.dataloader.sampler.set_epoch(epoch)
+            # Initialize metrics
+            metrics = {
+                'total_loss': 0.0,
+                'kl_loss': 0.0,
+                'prune_loss': 0.0,
+                'vertex_loss': 0.0,
+                'edge_loss': 0.0,
+                'offset_loss': 0.0,
+                'direction_loss': 0.0,
+                'connection_loss': 0.0,
+            }
+            num_batches = 0
+            self.optimizer.zero_grad(set_to_none=True)
+            for i, batch in enumerate(self.dataloader):
+                # Get all losses from train_step
+                if batch is None:
+                    continue
+                step_losses = self.train_step(batch)
+                # Accumulate losses
+                for key in metrics:
+                    metrics[key] += step_losses[key]
+                num_batches += 1
+                if (i + 1) % accum_steps == 0:
+                    self.scaler.unscale_(self.optimizer)
+                    torch.nn.utils.clip_grad_norm_(self.vae.parameters(), max_norm=1.0)
+                    torch.nn.utils.clip_grad_norm_(self.voxel_encoder.parameters(), max_norm=1.0)
+                    torch.nn.utils.clip_grad_norm_(self.connection_head.parameters(), max_norm=1.0)
+                    self.scaler.step(self.optimizer)
+                    self.scaler.update()
+                    self.optimizer.zero_grad(set_to_none=True)
+                    self.scheduler.step()
+                # Print batch-level metrics
+                if self.is_master:
+                    avg_metric = {key: value / num_batches for key, value in metrics.items()}
+                    print(
+                        f"[Epoch {epoch}] Batch:{num_batches} "
+                        f"AvgL:{avg_metric['total_loss']:.4f} "
+                        f"Loss: {step_losses['total_loss']:.4f}, "
+                        f"KLL: {step_losses['kl_loss']:.4f}, "
+                        f"PruneL: {step_losses['prune_loss']:.4f}, "
+                        f"VertexL: {step_losses['vertex_loss']:.4f}, "
+                        f"EdgeL: {step_losses['edge_loss']:.4f}, "
+                        f"OffsetL: {step_losses['offset_loss']:.4f}, "
+                        f"DireL: {step_losses['direction_loss']:.4f}, "
+                        f"ConL: {step_losses['connection_loss']:.4f}, "
+                        f"LR: {self.optimizer.param_groups[0]['lr']:.4e} "
+                    )
+                    if i % 2000 == 0 and i != 0:
+                        self.save_checkpoint(epoch, avg_metric['total_loss'], i)
+                        with open(self.log_file, "a") as f:
+                            current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+                            log_line = (
+                                f"Epoch {epoch:05d} | "
+                                f"Batch {i:05d} | "
+                                f"Loss: {avg_metric['total_loss']:.6f} "
+                                f"Avg KLL: {avg_metric['kl_loss']:.4f} "
+                                f"Avg PruneL: {avg_metric['prune_loss']:.4f} "
+                                f"Avg VertexL: {avg_metric['vertex_loss']:.4f} "
+                                f"Avg EdgeL: {avg_metric['edge_loss']:.4f} "
+                                f"Avg OffsetL: {avg_metric['offset_loss']:.4f} "
+                                f"Avg DireL: {avg_metric['direction_loss']:.4f} "
+                                f"Avg ConL: {avg_metric['connection_loss']:.4f} "
+                                f"LR: {self.optimizer.param_groups[0]['lr']:.4e} "
+                                f"[{current_time}]\n"
+                            )
+                            f.write(log_line)
+            if num_batches % accum_steps != 0:
+                self.scaler.unscale_(self.optimizer)
+                torch.nn.utils.clip_grad_norm_(self.vae.parameters(), max_norm=1.0)
+                torch.nn.utils.clip_grad_norm_(self.voxel_encoder.parameters(), max_norm=1.0)
+                torch.nn.utils.clip_grad_norm_(self.connection_head.parameters(), max_norm=1.0)
+                self.scaler.step(self.optimizer)
+                self.scaler.update()
+                self.optimizer.zero_grad(set_to_none=True)
+                self.scheduler.step()
+            # Calculate epoch averages
+            avg_metrics = {key: value / num_batches for key, value in metrics.items()}
+            self.train_loss_history.append(avg_metrics['total_loss'])
+            # Log to file
+            if self.is_master:
+                with open(self.log_file, "a") as f:
+                    current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+                    log_line = (
+                        f"Epoch {epoch:05d} | "
+                        f"Loss: {avg_metrics['total_loss']:.6f} "
+                        f"Avg KLL: {avg_metrics['kl_loss']:.4f} "
+                        f"Avg PruneL: {avg_metrics['prune_loss']:.4f} "
+                        f"Avg VertexL: {avg_metrics['vertex_loss']:.4f} "
+                        f"Avg EdgeL: {avg_metrics['edge_loss']:.4f} "
+                        f"Avg OffsetL: {avg_metrics['offset_loss']:.4f} "
+                        f"Avg DireL: {avg_metrics['direction_loss']:.4f} "
+                        f"Avg ConL: {avg_metrics['connection_loss']:.4f} "
+                        f"LR: {self.optimizer.param_groups[0]['lr']:.4e} "
+                        f"[{current_time}]\n"
+                    )
+                    f.write(log_line)
+                # Print epoch summary
+                print(
+                    f"[Epoch {epoch}] "
+                    f"Avg Loss: {avg_metrics['total_loss']:.4f} "
+                    f"Avg KLL: {avg_metrics['kl_loss']:.4f} "
+                    f"Avg PruneL: {avg_metrics['prune_loss']:.4f} "
+                    f"Avg VertexL: {avg_metrics['vertex_loss']:.4f} "
+                    f"Avg EdgeL: {avg_metrics['edge_loss']:.4f} "
+                    f"Avg OffsetL: {avg_metrics['offset_loss']:.4f} "
+                    f"Avg DireL: {avg_metrics['direction_loss']:.4f} "
+                    f"Avg ConL: {avg_metrics['connection_loss']:.4f} "
+                    f"[{current_time}]\n"
+                )
+                # Save checkpoint
+                if epoch % self.cfg['training']['save_every'] == 0:
+                    self.save_checkpoint(epoch, avg_metrics['total_loss'], i)
+            # Update learning rate
+            if self.is_master:
+                current_lr = self.optimizer.param_groups[0]['lr']
+                print(f"Epoch {epoch}: Learning rate updated to {current_lr:.2e}")
+            dist.barrier()
+def main():
+    # Initialize the process group
+    dist.init_process_group(backend='nccl')
+    # Get rank and world size from environment variables set by the launcher
+    rank = int(os.environ['RANK'])
+    world_size = int(os.environ['WORLD_SIZE'])
+    local_rank = int(os.environ['LOCAL_RANK'])
+    # Set the device for the current process. This is crucial.
+    torch.cuda.set_device(local_rank)
+    torch.manual_seed(42+rank)
+    # with torch.cuda.amp.autocast(dtype=torch.bfloat16):
+    # Pass the distributed info to the Trainer
+    trainer = Trainer(
+        config_path="/home/tiger/yy/src/Michelangelo-master/config_edge_1024_error_8enc_8dec_woself_finetune_128to512.yaml",
+        rank=rank,
+        world_size=world_size,
+        local_rank=local_rank
+    )
+    trainer.train()
+    # Clean up the process group
+    dist.destroy_process_group()
+if __name__ == '__main__':
+    main()

trellis/__init__.py ADDED Viewed

	@@ -0,0 +1,6 @@

+from . import models
+from . import modules
+from . import pipelines
+from . import renderers
+from . import representations
+from . import utils

trellis/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (344 Bytes). View file

trellis/datasets/__init__.py ADDED Viewed

	@@ -0,0 +1,58 @@

+import importlib
+__attributes = {
+    'SparseStructure': 'sparse_structure',
+    'SparseFeat2Render': 'sparse_feat2render',
+    'SLat2Render':'structured_latent2render',
+    'Slat2RenderGeo':'structured_latent2render',
+    'SparseStructureLatent': 'sparse_structure_latent',
+    'TextConditionedSparseStructureLatent': 'sparse_structure_latent',
+    'ImageConditionedSparseStructureLatent': 'sparse_structure_latent',
+    'SLat': 'structured_latent',
+    'TextConditionedSLat': 'structured_latent',
+    'ImageConditionedSLat': 'structured_latent',
+}
+__submodules = []
+__all__ = list(__attributes.keys()) + __submodules
+def __getattr__(name):
+    if name not in globals():
+        if name in __attributes:
+            module_name = __attributes[name]
+            module = importlib.import_module(f".{module_name}", __name__)
+            globals()[name] = getattr(module, name)
+        elif name in __submodules:
+            module = importlib.import_module(f".{name}", __name__)
+            globals()[name] = module
+        else:
+            raise AttributeError(f"module {__name__} has no attribute {name}")
+    return globals()[name]
+# For Pylance
+if __name__ == '__main__':
+    from .sparse_structure import SparseStructure
+    from .sparse_feat2render import SparseFeat2Render
+    from .structured_latent2render import (
+        SLat2Render,
+        Slat2RenderGeo,
+    )
+    from .sparse_structure_latent import (
+        SparseStructureLatent,
+        TextConditionedSparseStructureLatent,
+        ImageConditionedSparseStructureLatent,
+    )
+    from .structured_latent import (
+        SLat,
+        TextConditionedSLat,
+        ImageConditionedSLat,
+    )

trellis/datasets/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (1.24 kB). View file

trellis/datasets/__pycache__/components.cpython-310.pyc ADDED Viewed

Binary file (5.46 kB). View file

trellis/datasets/__pycache__/sparse_structure_latent.cpython-310.pyc ADDED Viewed

Binary file (6.94 kB). View file

trellis/datasets/components.py ADDED Viewed

	@@ -0,0 +1,137 @@

+from typing import *
+from abc import abstractmethod
+import os
+import json
+import torch
+import numpy as np
+import pandas as pd
+from PIL import Image
+from torch.utils.data import Dataset
+class StandardDatasetBase(Dataset):
+    """
+    Base class for standard datasets.
+    Args:
+        roots (str): paths to the dataset
+    """
+    def __init__(self,
+        roots: str,
+    ):
+        super().__init__()
+        self.roots = roots.split(',')
+        self.instances = []
+        self.metadata = pd.DataFrame()
+        self._stats = {}
+        for root in self.roots:
+            key = os.path.basename(root)
+            self._stats[key] = {}
+            metadata = pd.read_csv(os.path.join(root, 'metadata.csv'))
+            self._stats[key]['Total'] = len(metadata)
+            metadata, stats = self.filter_metadata(metadata)
+            self._stats[key].update(stats)
+            self.instances.extend([(root, sha256) for sha256 in metadata['sha256'].values])
+            metadata.set_index('sha256', inplace=True)
+            self.metadata = pd.concat([self.metadata, metadata])
+    @abstractmethod
+    def filter_metadata(self, metadata: pd.DataFrame) -> Tuple[pd.DataFrame, Dict[str, int]]:
+        pass
+    @abstractmethod
+    def get_instance(self, root: str, instance: str) -> Dict[str, Any]:
+        pass
+    def __len__(self):
+        return len(self.instances)
+    def __getitem__(self, index) -> Dict[str, Any]:
+        try:
+            root, instance = self.instances[index]
+            return self.get_instance(root, instance)
+        except Exception as e:
+            print(e)
+            return self.__getitem__(np.random.randint(0, len(self)))
+    def __str__(self):
+        lines = []
+        lines.append(self.__class__.__name__)
+        lines.append(f'  - Total instances: {len(self)}')
+        lines.append(f'  - Sources:')
+        for key, stats in self._stats.items():
+            lines.append(f'    - {key}:')
+            for k, v in stats.items():
+                lines.append(f'      - {k}: {v}')
+        return '\n'.join(lines)
+class TextConditionedMixin:
+    def __init__(self, roots, **kwargs):
+        super().__init__(roots, **kwargs)
+        self.captions = {}
+        for instance in self.instances:
+            sha256 = instance[1]
+            self.captions[sha256] = json.loads(self.metadata.loc[sha256]['captions'])
+    def filter_metadata(self, metadata):
+        metadata, stats = super().filter_metadata(metadata)
+        metadata = metadata[metadata['captions'].notna()]
+        stats['With captions'] = len(metadata)
+        return metadata, stats
+    def get_instance(self, root, instance):
+        pack = super().get_instance(root, instance)
+        text = np.random.choice(self.captions[instance])
+        pack['cond'] = text
+        return pack
+class ImageConditionedMixin:
+    def __init__(self, roots, *, image_size=518, **kwargs):
+        self.image_size = image_size
+        super().__init__(roots, **kwargs)
+    def filter_metadata(self, metadata):
+        metadata, stats = super().filter_metadata(metadata)
+        metadata = metadata[metadata[f'cond_rendered']]
+        stats['Cond rendered'] = len(metadata)
+        return metadata, stats
+    def get_instance(self, root, instance):
+        pack = super().get_instance(root, instance)
+        image_root = os.path.join(root, 'renders_cond', instance)
+        with open(os.path.join(image_root, 'transforms.json')) as f:
+            metadata = json.load(f)
+        n_views = len(metadata['frames'])
+        view = np.random.randint(n_views)
+        metadata = metadata['frames'][view]
+        image_path = os.path.join(image_root, metadata['file_path'])
+        image = Image.open(image_path)
+        alpha = np.array(image.getchannel(3))
+        bbox = np.array(alpha).nonzero()
+        bbox = [bbox[1].min(), bbox[0].min(), bbox[1].max(), bbox[0].max()]
+        center = [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2]
+        hsize = max(bbox[2] - bbox[0], bbox[3] - bbox[1]) / 2
+        aug_size_ratio = 1.2
+        aug_hsize = hsize * aug_size_ratio
+        aug_center_offset = [0, 0]
+        aug_center = [center[0] + aug_center_offset[0], center[1] + aug_center_offset[1]]
+        aug_bbox = [int(aug_center[0] - aug_hsize), int(aug_center[1] - aug_hsize), int(aug_center[0] + aug_hsize), int(aug_center[1] + aug_hsize)]
+        image = image.crop(aug_bbox)
+        image = image.resize((self.image_size, self.image_size), Image.Resampling.LANCZOS)
+        alpha = image.getchannel(3)
+        image = image.convert('RGB')
+        image = torch.tensor(np.array(image)).permute(2, 0, 1).float() / 255.0
+        alpha = torch.tensor(np.array(alpha)).float() / 255.0
+        image = image * alpha.unsqueeze(0)
+        pack['cond'] = image
+        return pack

trellis/datasets/sparse_feat2render.py ADDED Viewed

	@@ -0,0 +1,134 @@

+import os
+from PIL import Image
+import json
+import numpy as np
+import pandas as pd
+import torch
+import utils3d.torch
+from ..modules.sparse.basic import SparseTensor
+from .components import StandardDatasetBase
+class SparseFeat2Render(StandardDatasetBase):
+    """
+    SparseFeat2Render dataset.
+    Args:
+        roots (str): paths to the dataset
+        image_size (int): size of the image
+        model (str): model name
+        resolution (int): resolution of the data
+        min_aesthetic_score (float): minimum aesthetic score
+        max_num_voxels (int): maximum number of voxels
+    """
+    def __init__(
+        self,
+        roots: str,
+        image_size: int,
+        model: str = 'dinov2_vitl14_reg',
+        resolution: int = 64,
+        min_aesthetic_score: float = 5.0,
+        max_num_voxels: int = 32768,
+    ):
+        self.image_size = image_size
+        self.model = model
+        self.resolution = resolution
+        self.min_aesthetic_score = min_aesthetic_score
+        self.max_num_voxels = max_num_voxels
+        self.value_range = (0, 1)
+        super().__init__(roots)
+    def filter_metadata(self, metadata):
+        stats = {}
+        metadata = metadata[metadata[f'feature_{self.model}']]
+        stats['With features'] = len(metadata)
+        metadata = metadata[metadata['aesthetic_score'] >= self.min_aesthetic_score]
+        stats[f'Aesthetic score >= {self.min_aesthetic_score}'] = len(metadata)
+        metadata = metadata[metadata['num_voxels'] <= self.max_num_voxels]
+        stats[f'Num voxels <= {self.max_num_voxels}'] = len(metadata)
+        return metadata, stats
+    def _get_image(self, root, instance):
+        with open(os.path.join(root, 'renders', instance, 'transforms.json')) as f:
+            metadata = json.load(f)
+        n_views = len(metadata['frames'])
+        view = np.random.randint(n_views)
+        metadata = metadata['frames'][view]
+        fov = metadata['camera_angle_x']
+        intrinsics = utils3d.torch.intrinsics_from_fov_xy(torch.tensor(fov), torch.tensor(fov))
+        c2w = torch.tensor(metadata['transform_matrix'])
+        c2w[:3, 1:3] *= -1
+        extrinsics = torch.inverse(c2w)
+        image_path = os.path.join(root, 'renders', instance, metadata['file_path'])
+        image = Image.open(image_path)
+        alpha = image.getchannel(3)
+        image = image.convert('RGB')
+        image = image.resize((self.image_size, self.image_size), Image.Resampling.LANCZOS)
+        alpha = alpha.resize((self.image_size, self.image_size), Image.Resampling.LANCZOS)
+        image = torch.tensor(np.array(image)).permute(2, 0, 1).float() / 255.0
+        alpha = torch.tensor(np.array(alpha)).float() / 255.0
+        return {
+            'image': image,
+            'alpha': alpha,
+            'extrinsics': extrinsics,
+            'intrinsics': intrinsics,
+        }
+    def _get_feat(self, root, instance):
+        DATA_RESOLUTION = 64
+        feats_path = os.path.join(root, 'features', self.model, f'{instance}.npz')
+        feats = np.load(feats_path, allow_pickle=True)
+        coords = torch.tensor(feats['indices']).int()
+        feats = torch.tensor(feats['patchtokens']).float()
+        if self.resolution != DATA_RESOLUTION:
+            factor = DATA_RESOLUTION // self.resolution
+            coords = coords // factor
+            coords, idx = coords.unique(return_inverse=True, dim=0)
+            feats = torch.scatter_reduce(
+                torch.zeros(coords.shape[0], feats.shape[1], device=feats.device),
+                dim=0,
+                index=idx.unsqueeze(-1).expand(-1, feats.shape[1]),
+                src=feats,
+                reduce='mean'
+            )
+        return {
+            'coords': coords,
+            'feats': feats,
+        }
+    @torch.no_grad()
+    def visualize_sample(self, sample: dict):
+        return sample['image']
+    @staticmethod
+    def collate_fn(batch):
+        pack = {}
+        coords = []
+        for i, b in enumerate(batch):
+            coords.append(torch.cat([torch.full((b['coords'].shape[0], 1), i, dtype=torch.int32), b['coords']], dim=-1))
+        coords = torch.cat(coords)
+        feats = torch.cat([b['feats'] for b in batch])
+        pack['feats'] = SparseTensor(
+            coords=coords,
+            feats=feats,
+        )
+        pack['image'] = torch.stack([b['image'] for b in batch])
+        pack['alpha'] = torch.stack([b['alpha'] for b in batch])
+        pack['extrinsics'] = torch.stack([b['extrinsics'] for b in batch])
+        pack['intrinsics'] = torch.stack([b['intrinsics'] for b in batch])
+        return pack
+    def get_instance(self, root, instance):
+        image = self._get_image(root, instance)
+        feat = self._get_feat(root, instance)
+        return {
+            **image,
+            **feat,
+        }

trellis/datasets/sparse_structure.py ADDED Viewed

	@@ -0,0 +1,107 @@

+import os
+import json
+from typing import Union
+import numpy as np
+import pandas as pd
+import torch
+from torch.utils.data import Dataset
+import utils3d
+from .components import StandardDatasetBase
+from ..representations.octree import DfsOctree as Octree
+from ..renderers import OctreeRenderer
+class SparseStructure(StandardDatasetBase):
+    """
+    Sparse structure dataset
+    Args:
+        roots (str): path to the dataset
+        resolution (int): resolution of the voxel grid
+        min_aesthetic_score (float): minimum aesthetic score of the instances to be included in the dataset
+    """
+    def __init__(self,
+        roots,
+        resolution: int = 64,
+        min_aesthetic_score: float = 5.0,
+    ):
+        self.resolution = resolution
+        self.min_aesthetic_score = min_aesthetic_score
+        self.value_range = (0, 1)
+        super().__init__(roots)
+    def filter_metadata(self, metadata):
+        stats = {}
+        metadata = metadata[metadata[f'voxelized']]
+        stats['Voxelized'] = len(metadata)
+        metadata = metadata[metadata['aesthetic_score'] >= self.min_aesthetic_score]
+        stats[f'Aesthetic score >= {self.min_aesthetic_score}'] = len(metadata)
+        return metadata, stats
+    def get_instance(self, root, instance):
+        position = utils3d.io.read_ply(os.path.join(root, 'voxels', f'{instance}.ply'))[0]
+        coords = ((torch.tensor(position) + 0.5) * self.resolution).int().contiguous()
+        ss = torch.zeros(1, self.resolution, self.resolution, self.resolution, dtype=torch.long)
+        ss[:, coords[:, 0], coords[:, 1], coords[:, 2]] = 1
+        return {'ss': ss}
+    @torch.no_grad()
+    def visualize_sample(self, ss: Union[torch.Tensor, dict]):
+        ss = ss if isinstance(ss, torch.Tensor) else ss['ss']
+        renderer = OctreeRenderer()
+        renderer.rendering_options.resolution = 512
+        renderer.rendering_options.near = 0.8
+        renderer.rendering_options.far = 1.6
+        renderer.rendering_options.bg_color = (0, 0, 0)
+        renderer.rendering_options.ssaa = 4
+        renderer.pipe.primitive = 'voxel'
+        # Build camera
+        yaws = [0, np.pi / 2, np.pi, 3 * np.pi / 2]
+        yaws_offset = np.random.uniform(-np.pi / 4, np.pi / 4)
+        yaws = [y + yaws_offset for y in yaws]
+        pitch = [np.random.uniform(-np.pi / 4, np.pi / 4) for _ in range(4)]
+        exts = []
+        ints = []
+        for yaw, pitch in zip(yaws, pitch):
+            orig = torch.tensor([
+                np.sin(yaw) * np.cos(pitch),
+                np.cos(yaw) * np.cos(pitch),
+                np.sin(pitch),
+            ]).float().cuda() * 2
+            fov = torch.deg2rad(torch.tensor(30)).cuda()
+            extrinsics = utils3d.torch.extrinsics_look_at(orig, torch.tensor([0, 0, 0]).float().cuda(), torch.tensor([0, 0, 1]).float().cuda())
+            intrinsics = utils3d.torch.intrinsics_from_fov_xy(fov, fov)
+            exts.append(extrinsics)
+            ints.append(intrinsics)
+        images = []
+        # Build each representation
+        ss = ss.cuda()
+        for i in range(ss.shape[0]):
+            representation = Octree(
+                depth=10,
+                aabb=[-0.5, -0.5, -0.5, 1, 1, 1],
+                device='cuda',
+                primitive='voxel',
+                sh_degree=0,
+                primitive_config={'solid': True},
+            )
+            coords = torch.nonzero(ss[i, 0], as_tuple=False)
+            representation.position = coords.float() / self.resolution
+            representation.depth = torch.full((representation.position.shape[0], 1), int(np.log2(self.resolution)), dtype=torch.uint8, device='cuda')
+            image = torch.zeros(3, 1024, 1024).cuda()
+            tile = [2, 2]
+            for j, (ext, intr) in enumerate(zip(exts, ints)):
+                res = renderer.render(representation, ext, intr, colors_overwrite=representation.position)
+                image[:, 512 * (j // tile[1]):512 * (j // tile[1] + 1), 512 * (j % tile[1]):512 * (j % tile[1] + 1)] = res['color']
+            images.append(image)
+        return torch.stack(images)