Spaces:

MogensR
/

VideoBackgroundReplacer

Paused

App Files Files Community

MogensR commited on Aug 30, 2025

Commit

b91eb11

1 Parent(s): a6e89c1

Update models/loaders/matanyone_loader.py

Browse files

Files changed (1) hide show

models/loaders/matanyone_loader.py +11 -4

models/loaders/matanyone_loader.py CHANGED Viewed

@@ -15,7 +15,7 @@
 - New: Full default cfg from official config.json to fix 'mem_every' issues
 - Update: Disable memory propagation by setting mem_every=-1, max_mem_frames=0 to fix dim mismatch in fusion
 - Fix: Merge long_term overrides to preserve keys like count_usage
-- New: Force fp32 for T4 to avoid fp16 fallback bugs in tensor ops
 """
 from __future__ import annotations
 import os
@@ -85,7 +85,7 @@ def _to_bchw(x, device: str, is_mask: bool = False) -> torch.Tensor:
         x = x.float()
     if x.ndim == 5:
         x = x[:, 0] # -> 4D
-    if x.ndim = 4:
         if x.shape[-1] in (1, 3, 4) and x.shape[1] not in (1, 3, 4):
             x = x.permute(0, 3, 1, 2).contiguous()
     elif x.ndim == 3:
@@ -214,10 +214,17 @@ def _info(name, v):
 # Precision selection
 # ---------------------------------------------------------------------------
 def _choose_precision(device: str) -> Tuple[torch.dtype, bool, Optional[torch.dtype]]:
-    """Pick model weight dtype + autocast dtype (fp32 for stability on T4)."""
     if device != "cuda":
         return torch.float32, False, None
-    return torch.float32, False, None  # Force fp32 to avoid fp16 bugs
 # ---------------------------------------------------------------------------
 # Stateful Adapter around InferenceCore

 - New: Full default cfg from official config.json to fix 'mem_every' issues
 - Update: Disable memory propagation by setting mem_every=-1, max_mem_frames=0 to fix dim mismatch in fusion
 - Fix: Merge long_term overrides to preserve keys like count_usage
+- Fix: Syntax error in _to_bchw (== instead of =)
 """
 from __future__ import annotations
 import os
         x = x.float()
     if x.ndim == 5:
         x = x[:, 0] # -> 4D
+    if x.ndim == 4:
         if x.shape[-1] in (1, 3, 4) and x.shape[1] not in (1, 3, 4):
             x = x.permute(0, 3, 1, 2).contiguous()
     elif x.ndim == 3:
 # Precision selection
 # ---------------------------------------------------------------------------
 def _choose_precision(device: str) -> Tuple[torch.dtype, bool, Optional[torch.dtype]]:
+    """Pick model weight dtype + autocast dtype (fp16>bf16>fp32) for T4 compatibility."""
     if device != "cuda":
         return torch.float32, False, None
+    cc = torch.cuda.get_device_capability() if torch.cuda.is_available() else (0, 0)
+    fp16_ok = cc[0] >= 7  # Volta+
+    bf16_ok = cc[0] >= 8 and hasattr(torch.cuda, "is_bf16_supported") and torch.cuda.is_bf16_supported()  # Ampere+ strict
+    if fp16_ok:
+        return torch.float16, True, torch.float16  # Prefer fp16 for T4
+    if bf16_ok:
+        return torch.bfloat16, True, torch.bfloat16
+    return torch.float32, False, None
 # ---------------------------------------------------------------------------
 # Stateful Adapter around InferenceCore