SUPIR

Running on Zero

Fabrice-TIERCELIN commited on 25 days ago

Commit

60ef3c5

verified ·

1 Parent(s): d975146

Upload 7 files

Files changed (7) hide show

packages/ltx-core/src/ltx_core/model/upsampler/__init__.py CHANGED Viewed

+"""Latent upsampler model components."""
+from ltx_core.model.upsampler.model import LatentUpsampler, upsample_video
+from ltx_core.model.upsampler.model_configurator import LatentUpsamplerConfigurator
+__all__ = [
+    "LatentUpsampler",
+    "LatentUpsamplerConfigurator",
+    "upsample_video",
+]

packages/ltx-core/src/ltx_core/model/upsampler/blur_downsample.py CHANGED Viewed

@@ -1,6 +1,3 @@
-# Copyright (c) 2025 Lightricks. All rights reserved.
-# Created by Andrew Kvochko
 import math
 import torch

packages/ltx-core/src/ltx_core/model/upsampler/model.py CHANGED Viewed

@@ -1,19 +1,15 @@
-# Copyright (c) 2025 Lightricks. All rights reserved.
-# Created by Andrew Kvochko
 import torch
 from einops import rearrange
 from ltx_core.model.upsampler.pixel_shuffle import PixelShuffleND
 from ltx_core.model.upsampler.res_block import ResBlock
 from ltx_core.model.upsampler.spatial_rational_resampler import SpatialRationalResampler
 class LatentUpsampler(torch.nn.Module):
     """
-    Model to spatially upsample VAE latents.
     Args:
         in_channels (`int`): Number of channels in the input latent
         mid_channels (`int`): Number of channels in the middle layers
@@ -127,3 +123,20 @@ class LatentUpsampler(torch.nn.Module):
             x = self.final_conv(x)
         return x

 import torch
 from einops import rearrange
 from ltx_core.model.upsampler.pixel_shuffle import PixelShuffleND
 from ltx_core.model.upsampler.res_block import ResBlock
 from ltx_core.model.upsampler.spatial_rational_resampler import SpatialRationalResampler
+from ltx_core.model.video_vae import VideoEncoder
 class LatentUpsampler(torch.nn.Module):
     """
+    Model to upsample VAE latents spatially and/or temporally.
     Args:
         in_channels (`int`): Number of channels in the input latent
         mid_channels (`int`): Number of channels in the middle layers
             x = self.final_conv(x)
         return x
+def upsample_video(latent: torch.Tensor, video_encoder: VideoEncoder, upsampler: "LatentUpsampler") -> torch.Tensor:
+    """
+    Apply upsampling to the latent representation using the provided upsampler,
+    with normalization and un-normalization based on the video encoder's per-channel statistics.
+    Args:
+        latent: Input latent tensor of shape [B, C, F, H, W].
+        video_encoder: VideoEncoder with per_channel_statistics for normalization.
+        upsampler: LatentUpsampler module to perform upsampling.
+    Returns:
+        torch.Tensor: Upsampled and re-normalized latent tensor.
+    """
+    latent = video_encoder.per_channel_statistics.un_normalize(latent)
+    latent = upsampler(latent)
+    latent = video_encoder.per_channel_statistics.normalize(latent)
+    return latent

packages/ltx-core/src/ltx_core/model/upsampler/model_configurator.py CHANGED Viewed

@@ -3,6 +3,11 @@ from ltx_core.model.upsampler.model import LatentUpsampler
 class LatentUpsamplerConfigurator(ModelConfigurator[LatentUpsampler]):
     @classmethod
     def from_config(cls: type[LatentUpsampler], config: dict) -> LatentUpsampler:
         in_channels = config.get("in_channels", 128)

 class LatentUpsamplerConfigurator(ModelConfigurator[LatentUpsampler]):
+    """
+    Configurator for LatentUpsampler model.
+    Used to create a LatentUpsampler model from a configuration dictionary.
+    """
     @classmethod
     def from_config(cls: type[LatentUpsampler], config: dict) -> LatentUpsampler:
         in_channels = config.get("in_channels", 128)

packages/ltx-core/src/ltx_core/model/upsampler/pixel_shuffle.py CHANGED Viewed

@@ -1,6 +1,3 @@
-# Copyright (c) 2025 Lightricks. All rights reserved.
-# Created by Andrew Kvochko
 import torch
 from einops import rearrange
@@ -8,7 +5,6 @@ from einops import rearrange
 class PixelShuffleND(torch.nn.Module):
     """
     N-dimensional pixel shuffle operation for upsampling tensors.
     Args:
         dims (int): Number of dimensions to apply pixel shuffle to.
             - 1: Temporal (e.g., frames)
@@ -18,11 +14,9 @@ class PixelShuffleND(torch.nn.Module):
             For dims=1, only the first value is used.
             For dims=2, the first two values are used.
             For dims=3, all three values are used.
     The input tensor is rearranged so that the channel dimension is split into
     smaller channels and upscaling factors, and the upscaling factors are moved
     into the corresponding spatial/temporal dimensions.
     Note:
     This operation is equivalent to the patchifier operation in for the models. Consider
     using this class instead.

 import torch
 from einops import rearrange
 class PixelShuffleND(torch.nn.Module):
     """
     N-dimensional pixel shuffle operation for upsampling tensors.
     Args:
         dims (int): Number of dimensions to apply pixel shuffle to.
             - 1: Temporal (e.g., frames)
             For dims=1, only the first value is used.
             For dims=2, the first two values are used.
             For dims=3, all three values are used.
     The input tensor is rearranged so that the channel dimension is split into
     smaller channels and upscaling factors, and the upscaling factors are moved
     into the corresponding spatial/temporal dimensions.
     Note:
     This operation is equivalent to the patchifier operation in for the models. Consider
     using this class instead.

packages/ltx-core/src/ltx_core/model/upsampler/res_block.py CHANGED Viewed

@@ -1,6 +1,3 @@
-# Copyright (c) 2025 Lightricks. All rights reserved.
-# Created by Andrew Kvochko
 from typing import Optional
 import torch

packages/ltx-core/src/ltx_core/model/upsampler/spatial_rational_resampler.py CHANGED Viewed

@@ -1,6 +1,3 @@
-# Copyright (c) 2025 Lightricks. All rights reserved.
-# Created by Andrew Kvochko
 from typing import Tuple
 import torch
@@ -21,9 +18,7 @@ class SpatialRationalResampler(torch.nn.Module):
     """
     Fully-learned rational spatial scaling: up by 'num' via PixelShuffle, then anti-aliased
     downsample by 'den' using fixed blur + stride. Operates on H,W only.
     For dims==3, work per-frame for spatial scaling (temporal axis untouched).
     Args:
         mid_channels (`int`): Number of intermediate channels for the convolution layer
         scale (`float`): Spatial scaling factor. Supported values are:

 from typing import Tuple
 import torch
     """
     Fully-learned rational spatial scaling: up by 'num' via PixelShuffle, then anti-aliased
     downsample by 'den' using fixed blur + stride. Operates on H,W only.
     For dims==3, work per-frame for spatial scaling (temporal axis untouched).
     Args:
         mid_channels (`int`): Number of intermediate channels for the convolution layer
         scale (`float`): Spatial scaling factor. Supported values are: