APPFL
/

Building_load_forecasting

Model card Files Files and versions

xet

Community

exx commited on Oct 14, 2025

Commit

c20b869

1 Parent(s): 93517af

TimesFM2 grad issue debug

Browse files

Files changed (1) hide show

models/TimesFM2.py +81 -63

models/TimesFM2.py CHANGED Viewed

@@ -515,81 +515,99 @@ class TimesFM2Core(nn.Module):
     masks: torch.Tensor,
   ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor | None]:
     """Autoregressively decodes a batch of sequences."""
-    with torch.no_grad():
-      batch_size, context = inputs.shape
-      num_decode_steps = (horizon - 1) // self.o
-      num_input_patches = context // self.p
       decode_cache_size = num_input_patches + num_decode_steps * self.m
-      patched_inputs = torch.reshape(inputs, (batch_size, -1, self.p))
-      patched_masks = torch.reshape(masks, (batch_size, -1, self.p))
-      n = torch.zeros(batch_size, device=inputs.device)
-      mu = torch.zeros(batch_size, device=inputs.device)
-      sigma = torch.zeros(batch_size, device=inputs.device)
-      patch_mu: list[torch.Tensor] = []
-      patch_sigma: list[torch.Tensor] = []
-      for i in range(num_input_patches):
-        (n, mu, sigma), _ = update_running_stats(n, mu, sigma, patched_inputs[:, i], patched_masks[:, i])
-        patch_mu.append(mu)
-        patch_sigma.append(sigma)
-      last_n, last_mu, last_sigma = n, mu, sigma
-      context_mu = torch.stack(patch_mu, dim=1)
-      context_sigma = torch.stack(patch_sigma, dim=1)
       decode_caches = [
         DecodeCache(
           next_index=torch.zeros(batch_size, dtype=torch.int32, device=inputs.device),
           num_masked=torch.zeros(batch_size, dtype=torch.int32, device=inputs.device),
-          key=torch.zeros(batch_size, decode_cache_size, self.h, self.hd, device=inputs.device),
-          value=torch.zeros(batch_size, decode_cache_size, self.h, self.hd, device=inputs.device),
         )
         for _ in range(self.x)
       ]
-      normed_inputs = revin(patched_inputs, context_mu, context_sigma, reverse=False)
-      normed_inputs = torch.where(patched_masks, 0.0, normed_inputs)
-      (_, _, normed_outputs, normed_quantile_spread), decode_caches = self(normed_inputs, patched_masks, decode_caches)
-      renormed_outputs = torch.reshape(
-        revin(normed_outputs, context_mu, context_sigma, reverse=True),
-        (batch_size, -1, self.o, self.q),
       )
-      renormed_quantile_spread = torch.reshape(
-        revin(normed_quantile_spread, context_mu, context_sigma, reverse=True),
-        (batch_size, -1, self.os, self.q),
-      )[:, -1, ...]
-      ar_outputs: list[torch.Tensor] = []
-      last_renormed_output = renormed_outputs[:, -1, :, self.aridx]
-      for _ in range(num_decode_steps):
-        new_patched_input = torch.reshape(last_renormed_output, (batch_size, self.m, self.p))
-        new_mask = torch.zeros_like(new_patched_input, dtype=torch.bool)
-        n, mu, sigma = last_n, last_mu, last_sigma
-        new_mus: list[torch.Tensor] = []
-        new_sigmas: list[torch.Tensor] = []
-        for i in range(self.m):
-          (n, mu, sigma), _ = update_running_stats(n, mu, sigma, new_patched_input[:, i], new_mask[:, i])
-          new_mus.append(mu)
-          new_sigmas.append(sigma)
-        last_n, last_mu, last_sigma = n, mu, sigma
-        new_mu = torch.stack(new_mus, dim=1)
-        new_sigma = torch.stack(new_sigmas, dim=1)
-        new_normed_input = revin(new_patched_input, new_mu, new_sigma, reverse=False)
-        (_, _, new_normed_output, _), decode_caches = self(new_normed_input, new_mask, decode_caches)
-        new_renormed_output = torch.reshape(
-          revin(new_normed_output, new_mu, new_sigma, reverse=True),
-          (batch_size, self.m, self.o, self.q),
-        )
-        ar_outputs.append(new_renormed_output[:, -1, ...])
-        last_renormed_output = new_renormed_output[:, -1, :, self.aridx]
-      ar_renormed_outputs = torch.stack(ar_outputs, dim=1) if num_decode_steps > 0 else None
     return renormed_outputs, renormed_quantile_spread, ar_renormed_outputs

     masks: torch.Tensor,
   ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor | None]:
     """Autoregressively decodes a batch of sequences."""
+    batch_size, context = inputs.shape
+    num_decode_steps = (horizon - 1) // self.o
+    num_input_patches = context // self.p
+    use_cache = not torch.is_grad_enabled()
+    patched_inputs = torch.reshape(inputs, (batch_size, -1, self.p))
+    patched_masks = torch.reshape(masks, (batch_size, -1, self.p))
+    n = torch.zeros(batch_size, device=inputs.device, dtype=inputs.dtype)
+    mu = torch.zeros(batch_size, device=inputs.device, dtype=inputs.dtype)
+    sigma = torch.zeros(batch_size, device=inputs.device, dtype=inputs.dtype)
+    patch_mu: list[torch.Tensor] = []
+    patch_sigma: list[torch.Tensor] = []
+    for i in range(num_input_patches):
+      (n, mu, sigma), _ = update_running_stats(n, mu, sigma, patched_inputs[:, i], patched_masks[:, i])
+      patch_mu.append(mu)
+      patch_sigma.append(sigma)
+    last_n, last_mu, last_sigma = n, mu, sigma
+    context_mu = torch.stack(patch_mu, dim=1)
+    context_sigma = torch.stack(patch_sigma, dim=1)
+    decode_caches: list[DecodeCache] | None
+    if use_cache:
       decode_cache_size = num_input_patches + num_decode_steps * self.m
       decode_caches = [
         DecodeCache(
           next_index=torch.zeros(batch_size, dtype=torch.int32, device=inputs.device),
           num_masked=torch.zeros(batch_size, dtype=torch.int32, device=inputs.device),
+          key=torch.zeros(
+            batch_size,
+            decode_cache_size,
+            self.h,
+            self.hd,
+            device=inputs.device,
+            dtype=inputs.dtype,
+          ),
+          value=torch.zeros(
+            batch_size,
+            decode_cache_size,
+            self.h,
+            self.hd,
+            device=inputs.device,
+            dtype=inputs.dtype,
+          ),
         )
         for _ in range(self.x)
       ]
+    else:
+      decode_caches = None
+    normed_inputs = revin(patched_inputs, context_mu, context_sigma, reverse=False)
+    normed_inputs = torch.where(patched_masks, torch.zeros((), device=inputs.device, dtype=inputs.dtype), normed_inputs)
+    (_, _, normed_outputs, normed_quantile_spread), decode_caches = self(normed_inputs, patched_masks, decode_caches)
+    renormed_outputs = torch.reshape(
+      revin(normed_outputs, context_mu, context_sigma, reverse=True),
+      (batch_size, -1, self.o, self.q),
+    )
+    renormed_quantile_spread = torch.reshape(
+      revin(normed_quantile_spread, context_mu, context_sigma, reverse=True),
+      (batch_size, -1, self.os, self.q),
+    )[:, -1, ...]
+    ar_outputs: list[torch.Tensor] = []
+    last_renormed_output = renormed_outputs[:, -1, :, self.aridx]
+    for _ in range(num_decode_steps):
+      new_patched_input = torch.reshape(last_renormed_output, (batch_size, self.m, self.p))
+      new_mask = torch.zeros_like(new_patched_input, dtype=torch.bool)
+      n, mu, sigma = last_n, last_mu, last_sigma
+      new_mus: list[torch.Tensor] = []
+      new_sigmas: list[torch.Tensor] = []
+      for i in range(self.m):
+        (n, mu, sigma), _ = update_running_stats(n, mu, sigma, new_patched_input[:, i], new_mask[:, i])
+        new_mus.append(mu)
+        new_sigmas.append(sigma)
+      last_n, last_mu, last_sigma = n, mu, sigma
+      new_mu = torch.stack(new_mus, dim=1)
+      new_sigma = torch.stack(new_sigmas, dim=1)
+      new_normed_input = revin(new_patched_input, new_mu, new_sigma, reverse=False)
+      (_, _, new_normed_output, _), decode_caches = self(new_normed_input, new_mask, decode_caches)
+      new_renormed_output = torch.reshape(
+        revin(new_normed_output, new_mu, new_sigma, reverse=True),
+        (batch_size, self.m, self.o, self.q),
       )
+      ar_outputs.append(new_renormed_output[:, -1, ...])
+      last_renormed_output = new_renormed_output[:, -1, :, self.aridx]
+    ar_renormed_outputs = torch.stack(ar_outputs, dim=1) if num_decode_steps > 0 else None
     return renormed_outputs, renormed_quantile_spread, ar_renormed_outputs