Spaces:

phirni
/

next_frame_prediction

Sleeping

App Files Files Community

phirni commited on Oct 23, 2025

Commit

fbcd42e

verified ·

1 Parent(s): fab12b9

Update inference.py

Browse files

Files changed (1) hide show

inference.py +23 -21

inference.py CHANGED Viewed

@@ -2,32 +2,34 @@ import torch
 from torchvision import transforms
 from PIL import Image
 import numpy as np
-from model import ConvLSTMModel, BetaVAE
 # ===============================================================
-# Config
 # ===============================================================
 SEQUENCE_LENGTH = 10
 DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 # ===============================================================
-# Load Models
 # ===============================================================
 def load_convlstm(path="convlstm_model.pth"):
-    model = ConvLSTMModel()
     checkpoint = torch.load(path, map_location=DEVICE)
     model.load_state_dict(checkpoint)
     model.eval().to(DEVICE)
-    print("✅ ConvLSTM model loaded.")
     return model
 def load_beta_vae(path="beta_vae_model.pth"):
-    model = BetaVAE()
     checkpoint = torch.load(path, map_location=DEVICE)
     model.load_state_dict(checkpoint)
     model.eval().to(DEVICE)
-    print("✅ β-VAE model loaded.")
     return model
@@ -35,7 +37,7 @@ def load_beta_vae(path="beta_vae_model.pth"):
 # Frame Pre/Post Processing
 # ===============================================================
 def preprocess_frame(frame: Image.Image):
-    """Convert PIL image → torch tensor (1,1,H,W) normalized to [0,1]."""
     transform = transforms.Compose([
         transforms.Grayscale(),
         transforms.Resize((64, 64)),
@@ -46,7 +48,7 @@ def preprocess_frame(frame: Image.Image):
 def postprocess_frame(tensor):
-    """Convert torch tensor (1,1,H,W) → PIL image."""
     tensor = tensor.detach().cpu().clamp(0, 1)
     arr = tensor.squeeze().numpy() * 255
     arr = arr.astype(np.uint8)
@@ -54,33 +56,33 @@ def postprocess_frame(tensor):
 # ===============================================================
-# Inference Logic
 # ===============================================================
 @torch.no_grad()
-def predict_next_frame(model, sequence):
     """
     Args:
-        model: ConvLSTMModel
         sequence: tensor (1, T, 1, H, W)
     Returns:
-        PIL.Image
     """
-    model.eval()
     sequence = sequence.to(DEVICE)
-    next_frame = model(sequence)  # (1,1,H,W)
     return postprocess_frame(next_frame)
 @torch.no_grad()
-def reconstruct_frame(model, frame):
     """
     Args:
-        model: BetaVAE
-        frame: tensor (1,1,H,W)
     Returns:
-        PIL.Image
     """
-    model.eval()
     frame = frame.to(DEVICE)
-    recon, mu, logvar = model(frame)
     return postprocess_frame(recon)

 from torchvision import transforms
 from PIL import Image
 import numpy as np
+from model import BetaVAE, ConvLSTM  # your models
+import torch.nn.functional as F
 # ===============================================================
+# Configuration
 # ===============================================================
 SEQUENCE_LENGTH = 10
 DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 # ===============================================================
+# Model Loading
 # ===============================================================
 def load_convlstm(path="convlstm_model.pth"):
+    model = ConvLSTM(input_channels=1, hidden_channels=[64, 64, 64], output_channels=1)
     checkpoint = torch.load(path, map_location=DEVICE)
     model.load_state_dict(checkpoint)
     model.eval().to(DEVICE)
+    print("✅ ConvLSTM model loaded successfully.")
     return model
 def load_beta_vae(path="beta_vae_model.pth"):
+    model = BetaVAE(input_channels=1, latent_dim=64, beta=4.0)
     checkpoint = torch.load(path, map_location=DEVICE)
     model.load_state_dict(checkpoint)
     model.eval().to(DEVICE)
+    print("✅ β-VAE model loaded successfully.")
     return model
 # Frame Pre/Post Processing
 # ===============================================================
 def preprocess_frame(frame: Image.Image):
+    """Convert a PIL image to a normalized tensor (1, 1, 64, 64)."""
     transform = transforms.Compose([
         transforms.Grayscale(),
         transforms.Resize((64, 64)),
 def postprocess_frame(tensor):
+    """Convert tensor (1, 1, H, W) → PIL image."""
     tensor = tensor.detach().cpu().clamp(0, 1)
     arr = tensor.squeeze().numpy() * 255
     arr = arr.astype(np.uint8)
 # ===============================================================
+# Inference Helpers
 # ===============================================================
 @torch.no_grad()
+def predict_next_frame(convlstm_model, sequence):
     """
+    Predict the next frame using the ConvLSTM model.
     Args:
+        convlstm_model: trained ConvLSTM
         sequence: tensor (1, T, 1, H, W)
     Returns:
+        PIL.Image: predicted next frame
     """
     sequence = sequence.to(DEVICE)
+    next_frame = convlstm_model(sequence)
     return postprocess_frame(next_frame)
 @torch.no_grad()
+def reconstruct_frame(beta_vae_model, frame):
     """
+    Reconstruct a single frame using the β-VAE.
     Args:
+        beta_vae_model: trained β-VAE
+        frame: tensor (1, 1, H, W)
     Returns:
+        PIL.Image: reconstructed frame
     """
     frame = frame.to(DEVICE)
+    recon, mu, logvar = beta_vae_model(frame)
     return postprocess_frame(recon)