Spaces:

MogensR
/

VideoBackgroundReplacer

Paused

App Files Files Community

MogensR commited on Aug 29, 2025

Commit

880475f

1 Parent(s): a28f292

Update models/loaders/matanyone_loader.py

Browse files

Files changed (1) hide show

models/loaders/matanyone_loader.py +15 -24

models/loaders/matanyone_loader.py CHANGED Viewed

@@ -15,6 +15,7 @@
 - New: Full default cfg from official config.json to fix 'mem_every' issues
 - Update: Disable memory propagation by setting mem_every=-1, max_mem_frames=0 to fix dim mismatch in fusion
 - Fix: Merge long_term overrides to preserve keys like count_usage
 """
 from __future__ import annotations
 import os
@@ -84,7 +85,7 @@ def _to_bchw(x, device: str, is_mask: bool = False) -> torch.Tensor:
         x = x.float()
     if x.ndim == 5:
         x = x[:, 0] # -> 4D
-    if x.ndim == 4:
         if x.shape[-1] in (1, 3, 4) and x.shape[1] not in (1, 3, 4):
             x = x.permute(0, 3, 1, 2).contiguous()
     elif x.ndim == 3:
@@ -213,17 +214,10 @@ def _info(name, v):
 # Precision selection
 # ---------------------------------------------------------------------------
 def _choose_precision(device: str) -> Tuple[torch.dtype, bool, Optional[torch.dtype]]:
-    """Pick model weight dtype + autocast dtype (fp16>bf16>fp32) for T4 compatibility."""
     if device != "cuda":
         return torch.float32, False, None
-    cc = torch.cuda.get_device_capability() if torch.cuda.is_available() else (0, 0)
-    fp16_ok = cc[0] >= 7  # Volta+
-    bf16_ok = cc[0] >= 8 and hasattr(torch.cuda, "is_bf16_supported") and torch.cuda.is_bf16_supported()  # Ampere+ strict
-    if fp16_ok:
-        return torch.float16, True, torch.float16  # Prefer fp16 for T4
-    if bf16_ok:
-        return torch.bfloat16, True, torch.bfloat16
-    return torch.float32, False, None
 # ---------------------------------------------------------------------------
 # Stateful Adapter around InferenceCore
@@ -543,22 +537,19 @@ def load(self) -> Optional[Any]:
                 'use_long_term': False,
             }
             cfg.update(overrides)
-            # Merge long_term overrides without overwriting whole dict
             if 'long_term' in cfg:
-                cfg['long_term'].update({
-                    'max_mem_frames': 0,
-                    'min_mem_frames': 0,
-                    'count_usage': False,  # Disable since memory off
-                })
             else:
-                cfg['long_term'] = {
-                    'buffer_tokens': 2000,
-                    'count_usage': False,
-                    'max_mem_frames': 0,
-                    'max_num_tokens': 10000,
-                    'min_mem_frames': 0,
-                    'num_prototypes': 128
-                }
             # Convert to EasyDict for dot access
             cfg = EasyDict(cfg)
             # Inference core

 - New: Full default cfg from official config.json to fix 'mem_every' issues
 - Update: Disable memory propagation by setting mem_every=-1, max_mem_frames=0 to fix dim mismatch in fusion
 - Fix: Merge long_term overrides to preserve keys like count_usage
+- New: Force fp32 for T4 to avoid fp16 fallback bugs in tensor ops
 """
 from __future__ import annotations
 import os
         x = x.float()
     if x.ndim == 5:
         x = x[:, 0] # -> 4D
+    if x.ndim = 4:
         if x.shape[-1] in (1, 3, 4) and x.shape[1] not in (1, 3, 4):
             x = x.permute(0, 3, 1, 2).contiguous()
     elif x.ndim == 3:
 # Precision selection
 # ---------------------------------------------------------------------------
 def _choose_precision(device: str) -> Tuple[torch.dtype, bool, Optional[torch.dtype]]:
+    """Pick model weight dtype + autocast dtype (fp32 for stability on T4)."""
     if device != "cuda":
         return torch.float32, False, None
+    return torch.float32, False, None  # Force fp32 to avoid fp16 bugs
 # ---------------------------------------------------------------------------
 # Stateful Adapter around InferenceCore
                 'use_long_term': False,
             }
             cfg.update(overrides)
+            # Merge long_term overrides without removing keys
+            long_term_defaults = {
+                "buffer_tokens": 2000,
+                "count_usage": True,
+                "max_mem_frames": 0,
+                "max_num_tokens": 10000,
+                "min_mem_frames": 0,
+                "num_prototypes": 128
+            }
             if 'long_term' in cfg:
+                cfg['long_term'].update({k: v for k, v in long_term_defaults.items() if k not in cfg['long_term'] or k in ['max_mem_frames', 'min_mem_frames']})
             else:
+                cfg['long_term'] = long_term_defaults
             # Convert to EasyDict for dot access
             cfg = EasyDict(cfg)
             # Inference core