LTX-2-3

Runtime error

App Files Files Community

StatusReport commited on 24 days ago

Commit

ca0ae99

1 Parent(s): a930359

App: fix app startup issues.

Browse files

See https://huggingface.co/spaces/Lightricks/LTX-2-3/discussions/17 for more details.

Files changed (1) hide show

app.py +76 -0

app.py CHANGED Viewed

@@ -64,6 +64,82 @@ try:
 except Exception as e:
     print(f"[ATTN] xformers patch FAILED: {type(e).__name__}: {e}")
 logging.getLogger().setLevel(logging.INFO)
 MAX_SEED = np.iinfo(np.int32).max

 except Exception as e:
     print(f"[ATTN] xformers patch FAILED: {type(e).__name__}: {e}")
+# Disable xformers FA3 dispatch: FA3 kernels are Hopper-only (sm_90a), but
+# xformers' dispatcher gates them on `device_capability >= (9, 0)`, which also
+# matches Blackwell (RTX PRO 6000, the ZeroGPU fleet hardware since 2026-05-12)
+# and crashes at kernel launch with "invalid argument".
+try:
+    from xformers.ops.fmha import _set_use_fa3
+    _set_use_fa3(False)
+    print("[ATTN] xformers FA3 dispatch disabled (Blackwell-incompatible)")
+except Exception as e:
+    print(f"[ATTN] FA3 disable FAILED: {type(e).__name__}: {e}")
+# FUSE/mmap workaround: SafetensorsStateDictLoader.load uses safetensors.safe_open
+# under the hood, which mmap's the file. On bucket FUSE mounts that triggers a
+# page-fault storm and deadlocks loading. Bypass mmap by parsing the safetensors
+# header ourselves and reading each tensor's bytes directly.
+import json
+import struct
+from ltx_core.loader.primitives import StateDict
+from ltx_core.loader.sft_loader import SafetensorsStateDictLoader
+_SAFETENSORS_DTYPE_MAP = {
+    "F64": torch.float64,
+    "F32": torch.float32,
+    "F16": torch.float16,
+    "BF16": torch.bfloat16,
+    "F8_E5M2": torch.float8_e5m2,
+    "F8_E4M3": torch.float8_e4m3fn,
+    "I64": torch.int64,
+    "I32": torch.int32,
+    "I16": torch.int16,
+    "I8": torch.int8,
+    "U8": torch.uint8,
+    "BOOL": torch.bool,
+}
+def _patched_load(self, path, sd_ops, device=None):
+    sd = {}
+    size = 0
+    dtype = set()
+    device = device or torch.device("cpu")
+    model_paths = path if isinstance(path, list) else [path]
+    for shard_path in model_paths:
+        with open(shard_path, "rb") as f:
+            header_len = struct.unpack("<Q", f.read(8))[0]
+            header = json.loads(f.read(header_len).decode("utf-8"))
+            data_base = 8 + header_len
+            for name, meta in header.items():
+                if name == "__metadata__":
+                    continue
+                expected_name = name if sd_ops is None else sd_ops.apply_to_key(name)
+                if expected_name is None:
+                    continue
+                start, end = meta["data_offsets"]
+                f.seek(data_base + start)
+                buf = f.read(end - start)
+                t = torch.frombuffer(
+                    bytearray(buf), dtype=_SAFETENSORS_DTYPE_MAP[meta["dtype"]]
+                ).reshape(meta["shape"])
+                t = t.to(device=device, non_blocking=True, copy=False)
+                kvs = (
+                    ((expected_name, t),)
+                    if sd_ops is None
+                    else sd_ops.apply_to_key_value(expected_name, t)
+                )
+                for key, v in kvs:
+                    size += v.nbytes
+                    dtype.add(v.dtype)
+                    sd[key] = v
+    return StateDict(sd=sd, device=device, size=size, dtype=dtype)
+SafetensorsStateDictLoader.load = _patched_load
+print("[FUSE-PATCH] SafetensorsStateDictLoader.load replaced (chunked-read)")
 logging.getLogger().setLevel(logging.INFO)
 MAX_SEED = np.iinfo(np.int32).max