Spaces:

opsiclear-admin
/

Trellis.2.multiview

Running on Zero

App Files Files Community

opsiclear-admin commited on Feb 12

Commit

9c08519

verified ·

1 Parent(s): 40e9737

Best-of-N noise selection: probe 3 candidates, pick lowest std

Browse files

Files changed (1) hide show

trellis2/pipelines/trellis2_image_to_3d.py +23 -28

trellis2/pipelines/trellis2_image_to_3d.py CHANGED Viewed

@@ -392,22 +392,20 @@ class Trellis2ImageTo3DPipeline(Pipeline):
             self.models['shape_slat_decoder'].low_vram = False
         return ret
-    def _detect_degenerate_tex(
         self,
         flow_model,
         noise: SparseTensor,
         concat_cond: SparseTensor,
         cond: dict,
         sampler_params: dict,
-        threshold: float = 1.037,
-    ) -> bool:
         """
-        Run 2 probe steps to detect degenerate texture flow trajectory.
         Uses single-image inference (bypassing multi-image patching) so
-        the threshold stays calibrated regardless of fusion mode.
-        Returns True if the trajectory is degenerate (will produce black texture).
         """
         steps = sampler_params.get('steps', 12)
         rescale_t = sampler_params.get('rescale_t', 3.0)
@@ -415,7 +413,7 @@ class Trellis2ImageTo3DPipeline(Pipeline):
         t_seq = rescale_t * t_seq / (1 + (rescale_t - 1) * t_seq)
         t_seq = t_seq.tolist()
-        # Use single-image cond for probe (threshold was calibrated on single-image)
         probe_cond = {}
         for k, v in cond.items():
             probe_cond[k] = v[:1] if torch.is_tensor(v) and v.ndim >= 1 else v
@@ -442,8 +440,7 @@ class Trellis2ImageTo3DPipeline(Pipeline):
             if patched:
                 sampler._inference_model = patched_fn
-        x0_std = out.pred_x_0.feats.std().item()
-        return x0_std > threshold
     def sample_tex_slat(
         self,
@@ -451,22 +448,19 @@ class Trellis2ImageTo3DPipeline(Pipeline):
         flow_model,
         shape_slat: SparseTensor,
         sampler_params: dict = {},
-        max_retries: int = 3,
-        retry_noise_scale: float = 0.5,
     ) -> SparseTensor:
         """
         Sample structured latent with the given conditioning.
-        Includes early detection of degenerate texture flow trajectories
-        (black texture bug). If detected after 2 probe steps, retries with
-        scaled noise.
         Args:
             cond (dict): The conditioning information.
             shape_slat (SparseTensor): The structured latent for shape
             sampler_params (dict): Additional parameters for the sampler.
-            max_retries (int): Max retries on degenerate detection.
-            retry_noise_scale (float): Noise scale factor on retry.
         """
         # Sample structured latent
         std = torch.tensor(self.shape_slat_normalization['std'])[None].to(shape_slat.device)
@@ -479,18 +473,19 @@ class Trellis2ImageTo3DPipeline(Pipeline):
         if self.low_vram:
             flow_model.to(self.device)
-        noise_feats = torch.randn(shape_slat.coords.shape[0], n_noise_feats).to(self.device)
-        for attempt in range(max_retries + 1):
             noise = shape_slat.replace(feats=noise_feats)
-            if self._detect_degenerate_tex(flow_model, noise, shape_slat, cond, sampler_params):
-                if attempt < max_retries:
-                    noise_feats = torch.randn(shape_slat.coords.shape[0], n_noise_feats).to(self.device) * retry_noise_scale
-                    print(f"\033[93m[tex] Degenerate detected, retry {attempt+1}/{max_retries} with noise_scale={retry_noise_scale}\033[0m")
-                    continue
-                else:
-                    print(f"\033[93m[tex] Degenerate detected but retries exhausted, proceeding anyway\033[0m")
-            break
         slat = self.tex_slat_sampler.sample(
             flow_model,

             self.models['shape_slat_decoder'].low_vram = False
         return ret
+    def _probe_tex_noise(
         self,
         flow_model,
         noise: SparseTensor,
         concat_cond: SparseTensor,
         cond: dict,
         sampler_params: dict,
+    ) -> float:
         """
+        Run 2 probe steps and return pred_x0 std as a quality score.
+        Lower std = better trajectory (further from degenerate attractor).
         Uses single-image inference (bypassing multi-image patching) so
+        scores are comparable regardless of fusion mode.
         """
         steps = sampler_params.get('steps', 12)
         rescale_t = sampler_params.get('rescale_t', 3.0)
         t_seq = rescale_t * t_seq / (1 + (rescale_t - 1) * t_seq)
         t_seq = t_seq.tolist()
+        # Use single-image cond for probe (scores calibrated on single-image)
         probe_cond = {}
         for k, v in cond.items():
             probe_cond[k] = v[:1] if torch.is_tensor(v) and v.ndim >= 1 else v
             if patched:
                 sampler._inference_model = patched_fn
+        return out.pred_x_0.feats.std().item()
     def sample_tex_slat(
         self,
         flow_model,
         shape_slat: SparseTensor,
         sampler_params: dict = {},
+        num_candidates: int = 3,
     ) -> SparseTensor:
         """
         Sample structured latent with the given conditioning.
+        Probes multiple noise candidates and selects the one with the
+        lowest pred_x0 std (furthest from degenerate attractor).
         Args:
             cond (dict): The conditioning information.
             shape_slat (SparseTensor): The structured latent for shape
             sampler_params (dict): Additional parameters for the sampler.
+            num_candidates (int): Number of noise candidates to probe.
         """
         # Sample structured latent
         std = torch.tensor(self.shape_slat_normalization['std'])[None].to(shape_slat.device)
         if self.low_vram:
             flow_model.to(self.device)
+        # Probe multiple noise candidates, pick the best
+        best_noise_feats = None
+        best_score = float('inf')
+        for i in range(num_candidates):
+            noise_feats = torch.randn(shape_slat.coords.shape[0], n_noise_feats).to(self.device)
             noise = shape_slat.replace(feats=noise_feats)
+            score = self._probe_tex_noise(flow_model, noise, shape_slat, cond, sampler_params)
+            print(f"\033[93m[tex] Candidate {i+1}/{num_candidates}: pred_x0_std={score:.4f}\033[0m")
+            if score < best_score:
+                best_score = score
+                best_noise_feats = noise_feats
+        noise = shape_slat.replace(feats=best_noise_feats)
+        print(f"\033[93m[tex] Selected candidate with pred_x0_std={best_score:.4f}\033[0m")
         slat = self.tex_slat_sampler.sample(
             flow_model,