Spaces:

prithivMLmods
/

Multimodal-Edge-Node

Running on Zero

App Files Files Community

prithivMLmods commited on Apr 29

Commit

08077e1

verified ·

1 Parent(s): 740cd8f

Update app.py

Browse files

Files changed (1) hide show

app.py +62 -1

app.py CHANGED Viewed

@@ -11,6 +11,67 @@ import spaces
 import numpy as np
 import torch
 # ──────────────────────────────────────────────
 # Paths
 # ──────────────────────────────────────────────
@@ -460,4 +521,4 @@ Checkpoints are auto-downloaded (~50 GB) from `nvidia/Lyra-2.0` on HuggingFace a
 if __name__ == "__main__":
     demo = build_app()
-    demo.launch(css=CSS, ssr_mode=False)

 import numpy as np
 import torch
+# ──────────────────────────────────────────────
+# flash_attn install — must happen before any
+# lyra_2 import that pulls in flash_attn.
+# We pick the pre-built wheel that matches the
+# running torch+CUDA version automatically.
+# ──────────────────────────────────────────────
+def _install_flash_attn():
+    """
+    Install flash-attn from the pre-built wheels hosted on GitHub.
+    Matches the wheel to the running torch + CUDA version so the
+    .so symbols line up — which is exactly what caused the
+    'undefined symbol: _ZN3c104cuda...' error.
+    """
+    try:
+        import flash_attn          # already installed and importable → done
+        return
+    except ImportError:
+        pass
+    import torch, platform
+    torch_ver  = torch.__version__.split("+")[0].replace(".", "")   # e.g. "240"
+    cuda_ver   = torch.version.cuda.replace(".", "")                # e.g. "121"
+    py_ver     = f"cp{sys.version_info.major}{sys.version_info.minor}"  # e.g. "cp310"
+    arch       = platform.machine()                                 # "x86_64"
+    # Official pre-built wheel index from the flash-attn GitHub releases.
+    # Pattern: flash_attn-<fa_ver>+pt<torch>cu<cuda>-<py>-<py>-linux_<arch>.whl
+    # We try the newest FA2 release first then fall back to pip --no-build-isolation.
+    wheel_url = (
+        f"https://github.com/Dao-AILab/flash-attention/releases/download/"
+        f"v2.7.4.post1/"
+        f"flash_attn-2.7.4.post1+pt{torch_ver}cu{cuda_ver}-{py_ver}-{py_ver}"
+        f"-linux_{arch}.whl"
+    )
+    print(f"[Lyra] Installing flash-attn wheel: {wheel_url}")
+    result = subprocess.run(
+        [sys.executable, "-m", "pip", "install", wheel_url, "--no-deps", "-q"],
+        capture_output=True, text=True,
+    )
+    if result.returncode != 0:
+        print(f"[Lyra] Pre-built wheel not found for this env, "
+              f"falling back to pip install flash-attn --no-build-isolation ...")
+        # This compiles from source — slow (~20 min) but always works.
+        subprocess.run(
+            [sys.executable, "-m", "pip", "install", "flash-attn",
+             "--no-build-isolation", "-q"],
+            check=True,
+        )
+    try:
+        import flash_attn
+        print(f"[Lyra] flash_attn {flash_attn.__version__} ready.")
+    except ImportError as e:
+        raise RuntimeError(f"flash_attn install succeeded but import still fails: {e}")
+_install_flash_attn()
 # ──────────────────────────────────────────────
 # Paths
 # ──────────────────────────────────────────────
 if __name__ == "__main__":
     demo = build_app()
+    demo.launch(css=CSS)