Spaces:

will702
/

stockpro-ml

Sleeping

will702 commited on 23 days ago

Commit

ec4688a

1 Parent(s): 49e908a

fix: patch torchmetrics._apply using CPU probe tensor instead of self.device

The root cause of 'Torch not compiled with CUDA enabled' is that
torchmetrics.Metric._apply does fn(torch.zeros(1, device=self.device))
where self.device='cuda:0' from the checkpoint — even after map_location
moves all tensors to CPU, because _device is an attribute not a tensor.

Fix: replace the CUDA probe with fn(torch.zeros(1, device='cpu')) so
the destination device is inferred safely without touching CUDA hardware.
Patch is applied before pytorch_forecasting is imported so it is in place
when Lightning restores metric state during load_from_checkpoint.

Files changed (1) hide show

app/models/tft_predictor.py +32 -3

app/models/tft_predictor.py CHANGED Viewed

@@ -57,6 +57,31 @@ def _maybe_download(filename: str, local_path: str) -> bool:
         return False
 def load_model(model_path: str):
     """Load and cache the pytorch-forecasting TFT from a Lightning checkpoint."""
     global _model, _model_path_cached
@@ -67,11 +92,15 @@ def load_model(model_path: str):
     if not os.path.exists(model_path):
         raise FileNotFoundError(f"Model checkpoint not found: {model_path}")
     from pytorch_forecasting import TemporalFusionTransformer
-    # Checkpoint was saved on GPU (Colab). Using a callable map_location moves
-    # all tensors to CPU AND skips Lightning's model.to(device) call, which is
-    # what triggers the torchmetrics CUDA assertion on CPU-only servers.
     model = TemporalFusionTransformer.load_from_checkpoint(
         model_path,
         map_location=lambda storage, loc: storage.cpu(),

         return False
+def _patch_torchmetrics_cpu():
+    """Patch torchmetrics.Metric._apply to avoid CUDA errors on CPU-only servers.
+    When a GPU-trained checkpoint is loaded on CPU-only hardware, the torchmetrics
+    Metric._apply method does `fn(torch.zeros(1, device=self.device))` where
+    self.device may still be "cuda:0" from the checkpoint.  We replace that with
+    a safe CPU probe so the destination device is inferred without touching CUDA.
+    """
+    try:
+        import torchmetrics
+        import torch.nn as nn
+        _orig = torchmetrics.Metric._apply
+        def _safe_apply(self, fn):
+            # Probe destination device via a CPU tensor — never touches CUDA.
+            self._device = fn(torch.zeros(1, device="cpu")).device
+            return nn.Module._apply(self, fn)
+        torchmetrics.Metric._apply = _safe_apply
+        print("[tft] torchmetrics._apply patched for CPU-only inference")
+    except Exception as e:
+        print(f"[tft] torchmetrics patch skipped: {e}")
 def load_model(model_path: str):
     """Load and cache the pytorch-forecasting TFT from a Lightning checkpoint."""
     global _model, _model_path_cached
     if not os.path.exists(model_path):
         raise FileNotFoundError(f"Model checkpoint not found: {model_path}")
+    # Patch torchmetrics BEFORE importing pytorch_forecasting so the patched
+    # _apply is in place when Lightning restores metric state from the checkpoint.
+    _patch_torchmetrics_cpu()
     from pytorch_forecasting import TemporalFusionTransformer
+    # Callable map_location: moves all tensors to CPU AND skips Lightning's
+    # isinstance(map_location, (str, torch.device)) branch that would call
+    # model.to(map_location) — which would re-trigger the CUDA error.
     model = TemporalFusionTransformer.load_from_checkpoint(
         model_path,
         map_location=lambda storage, loc: storage.cpu(),