SUPIR

Running on Zero

App Files Files Community

Fabrice-TIERCELIN commited on 25 days ago

Commit

d975146

verified ·

1 Parent(s): cb0a691

Upload 13 files

Browse files

Files changed (13) hide show

packages/ltx-core/src/ltx_core/model/transformer/__init__.py +24 -0
packages/ltx-core/src/ltx_core/model/transformer/adaln.py +0 -5
packages/ltx-core/src/ltx_core/model/transformer/attention.py +3 -17
packages/ltx-core/src/ltx_core/model/transformer/feed_forward.py +0 -3
packages/ltx-core/src/ltx_core/model/transformer/gelu_approx.py +0 -3
packages/ltx-core/src/ltx_core/model/transformer/modality.py +6 -3
packages/ltx-core/src/ltx_core/model/transformer/model.py +0 -14
packages/ltx-core/src/ltx_core/model/transformer/model_configurator.py +15 -0
packages/ltx-core/src/ltx_core/model/transformer/rope.py +0 -3
packages/ltx-core/src/ltx_core/model/transformer/text_projection.py +0 -4
packages/ltx-core/src/ltx_core/model/transformer/timestep_embedding.py +0 -5
packages/ltx-core/src/ltx_core/model/transformer/transformer.py +37 -21
packages/ltx-core/src/ltx_core/model/transformer/transformer_args.py +0 -3

packages/ltx-core/src/ltx_core/model/transformer/__init__.py CHANGED Viewed

	@@ -0,0 +1,24 @@

+"""Transformer model components."""
+from ltx_core.model.transformer.modality import Modality
+from ltx_core.model.transformer.model import LTXModel, X0Model
+from ltx_core.model.transformer.model_configurator import (
+    LTXV_MODEL_COMFY_RENAMING_MAP,
+    LTXV_MODEL_COMFY_RENAMING_WITH_TRANSFORMER_LINEAR_DOWNCAST_MAP,
+    UPCAST_DURING_INFERENCE,
+    LTXModelConfigurator,
+    LTXVideoOnlyModelConfigurator,
+    UpcastWithStochasticRounding,
+)
+__all__ = [
+    "LTXV_MODEL_COMFY_RENAMING_MAP",
+    "LTXV_MODEL_COMFY_RENAMING_WITH_TRANSFORMER_LINEAR_DOWNCAST_MAP",
+    "UPCAST_DURING_INFERENCE",
+    "LTXModel",
+    "LTXModelConfigurator",
+    "LTXVideoOnlyModelConfigurator",
+    "Modality",
+    "UpcastWithStochasticRounding",
+    "X0Model",
+]

packages/ltx-core/src/ltx_core/model/transformer/adaln.py CHANGED Viewed

@@ -1,6 +1,3 @@
-# Copyright (c) 2025 Lightricks. All rights reserved.
-# Created by Andrew Kvochko
 from typing import Optional, Tuple
 import torch
@@ -11,9 +8,7 @@ from ltx_core.model.transformer.timestep_embedding import PixArtAlphaCombinedTim
 class AdaLayerNormSingle(torch.nn.Module):
     r"""
     Norm layer adaptive layer norm single (adaLN-single).
     As proposed in PixArt-Alpha (see: https://arxiv.org/abs/2310.00426; Section 2.3).
     Parameters:
         embedding_dim (`int`): The size of each embedding vector.
         use_additional_conditions (`bool`): To use additional conditions for normalization or not.

 from typing import Optional, Tuple
 import torch
 class AdaLayerNormSingle(torch.nn.Module):
     r"""
     Norm layer adaptive layer norm single (adaLN-single).
     As proposed in PixArt-Alpha (see: https://arxiv.org/abs/2310.00426; Section 2.3).
     Parameters:
         embedding_dim (`int`): The size of each embedding vector.
         use_additional_conditions (`bool`): To use additional conditions for normalization or not.

packages/ltx-core/src/ltx_core/model/transformer/attention.py CHANGED Viewed

@@ -1,6 +1,3 @@
-# Copyright (c) 2025 Lightricks. All rights reserved.
-# Created by Andrew Kvochko
 from enum import Enum
 from typing import Protocol
@@ -14,13 +11,8 @@ try:
     from xformers.ops import memory_efficient_attention
 except ImportError:
     memory_efficient_attention = None
-try:
-    # FlashAttention3 and XFormersAttention cannot be used together
-    if memory_efficient_attention is None:
-        import flash_attn_interface
-except ImportError:
-    flash_attn_interface = None
 class AttentionCallable(Protocol):
     def __call__(
@@ -67,7 +59,6 @@ class XFormersAttention(AttentionCallable):
         # xformers expects [B, M, H, K]
         q, k, v = (t.view(b, -1, heads, dim_head) for t in (q, k, v))
-        # LT_INTERNAL: https://github.com/LightricksResearch/ComfyUI/blob/ee2a50cd8fb3544c66f8a3096390c741fff12ae3/comfy/ldm/modules/attention.py#L441-L459
         if mask is not None:
             # add a singleton batch dimension
             if mask.ndim == 2:
@@ -129,14 +120,9 @@ class AttentionFunction(Enum):
     def __call__(
         self, q: torch.Tensor, k: torch.Tensor, v: torch.Tensor, heads: int, mask: torch.Tensor | None = None
     ) -> torch.Tensor:
-        if self is AttentionFunction.PYTORCH:
-            return PytorchAttention()(q, k, v, heads, mask)
-        elif self is AttentionFunction.XFORMERS:
-            return XFormersAttention()(q, k, v, heads, mask)
-        elif self is AttentionFunction.FLASH_ATTENTION_3:
             return FlashAttention3()(q, k, v, heads, mask)
         else:
-            # Default behavior: XFormers if installed else - PyTorch
             return (
                 XFormersAttention()(q, k, v, heads, mask)
                 if memory_efficient_attention is not None

 from enum import Enum
 from typing import Protocol
     from xformers.ops import memory_efficient_attention
 except ImportError:
     memory_efficient_attention = None
+import flash_attn_interface
 class AttentionCallable(Protocol):
     def __call__(
         # xformers expects [B, M, H, K]
         q, k, v = (t.view(b, -1, heads, dim_head) for t in (q, k, v))
         if mask is not None:
             # add a singleton batch dimension
             if mask.ndim == 2:
     def __call__(
         self, q: torch.Tensor, k: torch.Tensor, v: torch.Tensor, heads: int, mask: torch.Tensor | None = None
     ) -> torch.Tensor:
+        if mask is None:
             return FlashAttention3()(q, k, v, heads, mask)
         else:
             return (
                 XFormersAttention()(q, k, v, heads, mask)
                 if memory_efficient_attention is not None

packages/ltx-core/src/ltx_core/model/transformer/feed_forward.py CHANGED Viewed

@@ -1,6 +1,3 @@
-# Copyright (c) 2025 Lightricks. All rights reserved.
-# Created by Andrew Kvochko
 import torch
 from ltx_core.model.transformer.gelu_approx import GELUApprox





1	import torch
2
3	from ltx_core.model.transformer.gelu_approx import GELUApprox

packages/ltx-core/src/ltx_core/model/transformer/gelu_approx.py CHANGED Viewed

@@ -1,6 +1,3 @@
-# Copyright (c) 2025 Lightricks. All rights reserved.
-# Created by Andrew Kvochko
 import torch





1	import torch
2
3

packages/ltx-core/src/ltx_core/model/transformer/modality.py CHANGED Viewed

@@ -1,6 +1,3 @@
-# Copyright (c) 2025 Lightricks. All rights reserved.
-# Created by Andrew Kvochko
 from dataclasses import dataclass
 import torch
@@ -8,6 +5,12 @@ import torch
 @dataclass(frozen=True)
 class Modality:
     latent: (
         torch.Tensor
     )  # Shape: (B, T, D) where B is the batch size, T is the number of tokens, and D is input dimension

 from dataclasses import dataclass
 import torch
 @dataclass(frozen=True)
 class Modality:
+    """
+    Input data for a single modality (video or audio) in the transformer.
+    Bundles the latent tokens, timestep embeddings, positional information,
+    and text conditioning context for processing by the diffusion transformer.
+    """
     latent: (
         torch.Tensor
     )  # Shape: (B, T, D) where B is the batch size, T is the number of tokens, and D is input dimension

packages/ltx-core/src/ltx_core/model/transformer/model.py CHANGED Viewed

@@ -1,7 +1,3 @@
-# Copyright (c) 2025 Lightricks. All rights reserved.
-# Created by Andrew Kvochko
 from enum import Enum
 import torch
@@ -36,7 +32,6 @@ class LTXModelType(Enum):
 class LTXModel(torch.nn.Module):
     """
     LTX model transformer implementation.
     This class implements the transformer blocks for the LTX model.
     """
@@ -315,11 +310,9 @@ class LTXModel(torch.nn.Module):
     def set_gradient_checkpointing(self, enable: bool) -> None:
         """Enable or disable gradient checkpointing for transformer blocks.
         Gradient checkpointing trades compute for memory by recomputing activations
         during the backward pass instead of storing them. This can significantly
         reduce memory usage at the cost of ~20-30% slower training.
         Args:
             enable: Whether to enable gradient checkpointing
         """
@@ -380,7 +373,6 @@ class LTXModel(torch.nn.Module):
     ) -> tuple[torch.Tensor, torch.Tensor]:
         """
         Forward pass for LTX models.
         Returns:
             Processed output tensors
         """
@@ -424,10 +416,6 @@ class LegacyX0Model(torch.nn.Module):
     """
     Legacy X0 model implementation.
     Returns fully denoised output based on the velocities produced by the base model.
-    LT_INTERNAL_BEGIN
-    Applies full sigma when denoising which is mathematically incorrect but in accordance with:
-    https://github.com/LightricksResearch/ComfyUI/blob/cc26711bd34135a3eac782b81f9526c5acfcf94d/comfy/model_sampling.py#L62-L68
-    LT_INTERNAL_END
     """
     def __init__(self, velocity_model: LTXModel):
@@ -443,7 +431,6 @@ class LegacyX0Model(torch.nn.Module):
     ) -> tuple[torch.Tensor | None, torch.Tensor | None]:
         """
         Denoise the video and audio according to the sigma.
         Returns:
             Denoised video and audio
         """
@@ -472,7 +459,6 @@ class X0Model(torch.nn.Module):
     ) -> tuple[torch.Tensor | None, torch.Tensor | None]:
         """
         Denoise the video and audio according to the sigma.
         Returns:
             Denoised video and audio
         """

 from enum import Enum
 import torch
 class LTXModel(torch.nn.Module):
     """
     LTX model transformer implementation.
     This class implements the transformer blocks for the LTX model.
     """
     def set_gradient_checkpointing(self, enable: bool) -> None:
         """Enable or disable gradient checkpointing for transformer blocks.
         Gradient checkpointing trades compute for memory by recomputing activations
         during the backward pass instead of storing them. This can significantly
         reduce memory usage at the cost of ~20-30% slower training.
         Args:
             enable: Whether to enable gradient checkpointing
         """
     ) -> tuple[torch.Tensor, torch.Tensor]:
         """
         Forward pass for LTX models.
         Returns:
             Processed output tensors
         """
     """
     Legacy X0 model implementation.
     Returns fully denoised output based on the velocities produced by the base model.
     """
     def __init__(self, velocity_model: LTXModel):
     ) -> tuple[torch.Tensor | None, torch.Tensor | None]:
         """
         Denoise the video and audio according to the sigma.
         Returns:
             Denoised video and audio
         """
     ) -> tuple[torch.Tensor | None, torch.Tensor | None]:
         """
         Denoise the video and audio according to the sigma.
         Returns:
             Denoised video and audio
         """

packages/ltx-core/src/ltx_core/model/transformer/model_configurator.py CHANGED Viewed

@@ -11,6 +11,11 @@ from ltx_core.utils import check_config_value
 class LTXModelConfigurator(ModelConfigurator[LTXModel]):
     @classmethod
     def from_config(cls: type[LTXModel], config: dict) -> LTXModel:
         config = config.get("transformer", {})
@@ -62,6 +67,11 @@ class LTXModelConfigurator(ModelConfigurator[LTXModel]):
 class LTXVideoOnlyModelConfigurator(ModelConfigurator[LTXModel]):
     @classmethod
     def from_config(cls: type[LTXModel], config: dict) -> LTXModel:
         config = config.get("transformer", {})
@@ -213,6 +223,11 @@ UPCAST_DURING_INFERENCE = ModuleOps(
 class UpcastWithStochasticRounding(ModuleOps):
     def __new__(cls, seed: int = 0):
         return super().__new__(
             cls,

 class LTXModelConfigurator(ModelConfigurator[LTXModel]):
+    """
+    Configurator for LTX model.
+    Used to create an LTX model from a configuration dictionary.
+    """
     @classmethod
     def from_config(cls: type[LTXModel], config: dict) -> LTXModel:
         config = config.get("transformer", {})
 class LTXVideoOnlyModelConfigurator(ModelConfigurator[LTXModel]):
+    """
+    Configurator for LTX video only model.
+    Used to create an LTX video only model from a configuration dictionary.
+    """
     @classmethod
     def from_config(cls: type[LTXModel], config: dict) -> LTXModel:
         config = config.get("transformer", {})
 class UpcastWithStochasticRounding(ModuleOps):
+    """
+    ModuleOps for upcasting the model's float8_e4m3fn weights and biases to the bfloat16 dtype
+    and applying stochastic rounding during linear forward.
+    """
     def __new__(cls, seed: int = 0):
         return super().__new__(
             cls,

packages/ltx-core/src/ltx_core/model/transformer/rope.py CHANGED Viewed

@@ -1,6 +1,3 @@
-# Copyright (c) 2025 Lightricks. All rights reserved.
-# Created by Andrew Kvochko
 import functools
 import math
 from enum import Enum

 import functools
 import math
 from enum import Enum

packages/ltx-core/src/ltx_core/model/transformer/text_projection.py CHANGED Viewed

@@ -1,13 +1,9 @@
-# Copyright (c) 2025 Lightricks. All rights reserved.
-# Created by Andrew Kvochko
 import torch
 class PixArtAlphaTextProjection(torch.nn.Module):
     """
     Projects caption embeddings. Also handles dropout for classifier-free guidance.
     Adapted from https://github.com/PixArt-alpha/PixArt-alpha/blob/master/diffusion/model/nets/PixArt_blocks.py
     """

 import torch
 class PixArtAlphaTextProjection(torch.nn.Module):
     """
     Projects caption embeddings. Also handles dropout for classifier-free guidance.
     Adapted from https://github.com/PixArt-alpha/PixArt-alpha/blob/master/diffusion/model/nets/PixArt_blocks.py
     """

packages/ltx-core/src/ltx_core/model/transformer/timestep_embedding.py CHANGED Viewed

@@ -1,6 +1,3 @@
-# Copyright (c) 2025 Lightricks. All rights reserved.
-# Created by Andrew Kvochko
 import math
 import torch
@@ -16,7 +13,6 @@ def get_timestep_embedding(
 ) -> torch.Tensor:
     """
     This matches the implementation in Denoising Diffusion Probabilistic Models: Create sinusoidal timestep embeddings.
     Args
         timesteps (torch.Tensor):
             a 1-D Tensor of N indices, one per batch element. These may be fractional.
@@ -122,7 +118,6 @@ class Timesteps(torch.nn.Module):
 class PixArtAlphaCombinedTimestepSizeEmbeddings(torch.nn.Module):
     """
     For PixArt-Alpha.
     Reference:
     https://github.com/PixArt-alpha/PixArt-alpha/blob/0f55e922376d8b797edd44d25d0e7464b260dcab/diffusion/model/nets/PixArtMS.py#L164C9-L168C29
     """

 import math
 import torch
 ) -> torch.Tensor:
     """
     This matches the implementation in Denoising Diffusion Probabilistic Models: Create sinusoidal timestep embeddings.
     Args
         timesteps (torch.Tensor):
             a 1-D Tensor of N indices, one per batch element. These may be fractional.
 class PixArtAlphaCombinedTimestepSizeEmbeddings(torch.nn.Module):
     """
     For PixArt-Alpha.
     Reference:
     https://github.com/PixArt-alpha/PixArt-alpha/blob/0f55e922376d8b797edd44d25d0e7464b260dcab/diffusion/model/nets/PixArtMS.py#L164C9-L168C29
     """

packages/ltx-core/src/ltx_core/model/transformer/transformer.py CHANGED Viewed

@@ -1,6 +1,3 @@
-# Copyright (c) 2025 Lightricks. All rights reserved.
-# Created by Andrew Kvochko
 from dataclasses import dataclass, replace
 import torch
@@ -107,16 +104,13 @@ class BasicAVTransformerBlock(torch.nn.Module):
         self.norm_eps = norm_eps
     def get_ada_values(
-        self,
-        scale_shift_table: torch.Tensor,
-        batch_size: int,
-        timestep: torch.Tensor,
-    ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
         num_ada_params = scale_shift_table.shape[0]
         ada_values = (
-            scale_shift_table.unsqueeze(0).unsqueeze(0).to(timestep.dtype)
-            + timestep.reshape(batch_size, timestep.shape[1], num_ada_params, -1)
         ).unbind(dim=2)
         return ada_values
@@ -129,14 +123,10 @@ class BasicAVTransformerBlock(torch.nn.Module):
         num_scale_shift_values: int = 4,
     ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
         scale_shift_ada_values = self.get_ada_values(
-            scale_shift_table[:num_scale_shift_values, :],
-            batch_size,
-            scale_shift_timestep,
         )
         gate_ada_values = self.get_ada_values(
-            scale_shift_table[num_scale_shift_values:, :],
-            batch_size,
-            gate_timestep,
         )
         scale_shift_chunks = [t.squeeze(2) for t in scale_shift_ada_values]
@@ -144,7 +134,7 @@ class BasicAVTransformerBlock(torch.nn.Module):
         return (*scale_shift_chunks, *gate_ada_values)
-    def forward(
         self,
         video: TransformerArgs | None,
         audio: TransformerArgs | None,
@@ -164,8 +154,8 @@ class BasicAVTransformerBlock(torch.nn.Module):
         run_v2a = run_ax and (video is not None and video.enabled and vx.numel() > 0)
         if run_vx:
-            vshift_msa, vscale_msa, vgate_msa, vshift_mlp, vscale_mlp, vgate_mlp = self.get_ada_values(
-                self.scale_shift_table, vx.shape[0], video.timesteps
             )
             if not perturbations.all_in_batch(PerturbationType.SKIP_VIDEO_SELF_ATTN, self.idx):
                 norm_vx = rms_norm(vx, eps=self.norm_eps) * (1 + vscale_msa) + vshift_msa
@@ -174,9 +164,11 @@ class BasicAVTransformerBlock(torch.nn.Module):
             vx = vx + self.attn2(rms_norm(vx, eps=self.norm_eps), context=video.context, mask=video.context_mask)
         if run_ax:
-            ashift_msa, ascale_msa, agate_msa, ashift_mlp, ascale_mlp, agate_mlp = self.get_ada_values(
-                self.audio_scale_shift_table, ax.shape[0], audio.timesteps
             )
             if not perturbations.all_in_batch(PerturbationType.SKIP_AUDIO_SELF_ATTN, self.idx):
@@ -186,6 +178,8 @@ class BasicAVTransformerBlock(torch.nn.Module):
             ax = ax + self.audio_attn2(rms_norm(ax, eps=self.norm_eps), context=audio.context, mask=audio.context_mask)
         # Audio - Video cross attention.
         if run_a2v or run_v2a:
             vx_norm3 = rms_norm(vx, eps=self.norm_eps)
@@ -247,12 +241,34 @@ class BasicAVTransformerBlock(torch.nn.Module):
                     * v2a_mask
                 )
         if run_vx:
             vx_scaled = rms_norm(vx, eps=self.norm_eps) * (1 + vscale_mlp) + vshift_mlp
             vx = vx + self.ff(vx_scaled) * vgate_mlp
         if run_ax:
             ax_scaled = rms_norm(ax, eps=self.norm_eps) * (1 + ascale_mlp) + ashift_mlp
             ax = ax + self.audio_ff(ax_scaled) * agate_mlp
         return replace(video, x=vx) if video is not None else None, replace(audio, x=ax) if audio is not None else None

 from dataclasses import dataclass, replace
 import torch
         self.norm_eps = norm_eps
     def get_ada_values(
+        self, scale_shift_table: torch.Tensor, batch_size: int, timestep: torch.Tensor, indices: slice
+    ) -> tuple[torch.Tensor, ...]:
         num_ada_params = scale_shift_table.shape[0]
         ada_values = (
+            scale_shift_table[indices].unsqueeze(0).unsqueeze(0).to(device=timestep.device, dtype=timestep.dtype)
+            + timestep.reshape(batch_size, timestep.shape[1], num_ada_params, -1)[:, :, indices, :]
         ).unbind(dim=2)
         return ada_values
         num_scale_shift_values: int = 4,
     ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
         scale_shift_ada_values = self.get_ada_values(
+            scale_shift_table[:num_scale_shift_values, :], batch_size, scale_shift_timestep, slice(None, None)
         )
         gate_ada_values = self.get_ada_values(
+            scale_shift_table[num_scale_shift_values:, :], batch_size, gate_timestep, slice(None, None)
         )
         scale_shift_chunks = [t.squeeze(2) for t in scale_shift_ada_values]
         return (*scale_shift_chunks, *gate_ada_values)
+    def forward(  # noqa: PLR0915
         self,
         video: TransformerArgs | None,
         audio: TransformerArgs | None,
         run_v2a = run_ax and (video is not None and video.enabled and vx.numel() > 0)
         if run_vx:
+            vshift_msa, vscale_msa, vgate_msa = self.get_ada_values(
+                self.scale_shift_table, vx.shape[0], video.timesteps, slice(0, 3)
             )
             if not perturbations.all_in_batch(PerturbationType.SKIP_VIDEO_SELF_ATTN, self.idx):
                 norm_vx = rms_norm(vx, eps=self.norm_eps) * (1 + vscale_msa) + vshift_msa
             vx = vx + self.attn2(rms_norm(vx, eps=self.norm_eps), context=video.context, mask=video.context_mask)
+            del vshift_msa, vscale_msa, vgate_msa
         if run_ax:
+            ashift_msa, ascale_msa, agate_msa = self.get_ada_values(
+                self.audio_scale_shift_table, ax.shape[0], audio.timesteps, slice(0, 3)
             )
             if not perturbations.all_in_batch(PerturbationType.SKIP_AUDIO_SELF_ATTN, self.idx):
             ax = ax + self.audio_attn2(rms_norm(ax, eps=self.norm_eps), context=audio.context, mask=audio.context_mask)
+            del ashift_msa, ascale_msa, agate_msa
         # Audio - Video cross attention.
         if run_a2v or run_v2a:
             vx_norm3 = rms_norm(vx, eps=self.norm_eps)
                     * v2a_mask
                 )
+            del gate_out_a2v, gate_out_v2a
+            del (
+                scale_ca_video_hidden_states_a2v,
+                shift_ca_video_hidden_states_a2v,
+                scale_ca_audio_hidden_states_a2v,
+                shift_ca_audio_hidden_states_a2v,
+                scale_ca_video_hidden_states_v2a,
+                shift_ca_video_hidden_states_v2a,
+                scale_ca_audio_hidden_states_v2a,
+                shift_ca_audio_hidden_states_v2a,
+            )
         if run_vx:
+            vshift_mlp, vscale_mlp, vgate_mlp = self.get_ada_values(
+                self.scale_shift_table, vx.shape[0], video.timesteps, slice(3, None)
+            )
             vx_scaled = rms_norm(vx, eps=self.norm_eps) * (1 + vscale_mlp) + vshift_mlp
             vx = vx + self.ff(vx_scaled) * vgate_mlp
+            del vshift_mlp, vscale_mlp, vgate_mlp
         if run_ax:
+            ashift_mlp, ascale_mlp, agate_mlp = self.get_ada_values(
+                self.audio_scale_shift_table, ax.shape[0], audio.timesteps, slice(3, None)
+            )
             ax_scaled = rms_norm(ax, eps=self.norm_eps) * (1 + ascale_mlp) + ashift_mlp
             ax = ax + self.audio_ff(ax_scaled) * agate_mlp
+            del ashift_mlp, ascale_mlp, agate_mlp
         return replace(video, x=vx) if video is not None else None, replace(audio, x=ax) if audio is not None else None

packages/ltx-core/src/ltx_core/model/transformer/transformer_args.py CHANGED Viewed

@@ -1,6 +1,3 @@
-# Copyright (c) 2025 Lightricks. All rights reserved.
-# Created by Andrew Kvochko
 from dataclasses import dataclass, replace
 import torch





1	from dataclasses import dataclass, replace
2
3	import torch