xiaoanyu123 commited on
Commit
45a975e
·
verified ·
1 Parent(s): 1daf802

Add files using upload-large-folder tool

Browse files
Files changed (20) hide show
  1. pythonProject/.venv/Lib/site-packages/diffusers/models/autoencoders/__init__.py +17 -0
  2. pythonProject/.venv/Lib/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_asym_kl.cpython-310.pyc +0 -0
  3. pythonProject/.venv/Lib/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_dc.cpython-310.pyc +0 -0
  4. pythonProject/.venv/Lib/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl.cpython-310.pyc +0 -0
  5. pythonProject/.venv/Lib/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_allegro.cpython-310.pyc +0 -0
  6. pythonProject/.venv/Lib/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_cogvideox.cpython-310.pyc +0 -0
  7. pythonProject/.venv/Lib/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_cosmos.cpython-310.pyc +0 -0
  8. pythonProject/.venv/Lib/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_hunyuan_video.cpython-310.pyc +0 -0
  9. pythonProject/.venv/Lib/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_ltx.cpython-310.pyc +0 -0
  10. pythonProject/.venv/Lib/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_magvit.cpython-310.pyc +0 -0
  11. pythonProject/.venv/Lib/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_mochi.cpython-310.pyc +0 -0
  12. pythonProject/.venv/Lib/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_qwenimage.cpython-310.pyc +0 -0
  13. pythonProject/.venv/Lib/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_temporal_decoder.cpython-310.pyc +0 -0
  14. pythonProject/.venv/Lib/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_wan.cpython-310.pyc +0 -0
  15. pythonProject/.venv/Lib/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_oobleck.cpython-310.pyc +0 -0
  16. pythonProject/.venv/Lib/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_tiny.cpython-310.pyc +0 -0
  17. pythonProject/.venv/Lib/site-packages/diffusers/models/autoencoders/__pycache__/consistency_decoder_vae.cpython-310.pyc +0 -0
  18. pythonProject/.venv/Lib/site-packages/diffusers/models/autoencoders/__pycache__/vae.cpython-310.pyc +0 -0
  19. pythonProject/.venv/Lib/site-packages/diffusers/models/autoencoders/__pycache__/vq_model.cpython-310.pyc +0 -0
  20. pythonProject/.venv/Lib/site-packages/diffusers/models/autoencoders/vq_model.py +185 -0
pythonProject/.venv/Lib/site-packages/diffusers/models/autoencoders/__init__.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .autoencoder_asym_kl import AsymmetricAutoencoderKL
2
+ from .autoencoder_dc import AutoencoderDC
3
+ from .autoencoder_kl import AutoencoderKL
4
+ from .autoencoder_kl_allegro import AutoencoderKLAllegro
5
+ from .autoencoder_kl_cogvideox import AutoencoderKLCogVideoX
6
+ from .autoencoder_kl_cosmos import AutoencoderKLCosmos
7
+ from .autoencoder_kl_hunyuan_video import AutoencoderKLHunyuanVideo
8
+ from .autoencoder_kl_ltx import AutoencoderKLLTXVideo
9
+ from .autoencoder_kl_magvit import AutoencoderKLMagvit
10
+ from .autoencoder_kl_mochi import AutoencoderKLMochi
11
+ from .autoencoder_kl_qwenimage import AutoencoderKLQwenImage
12
+ from .autoencoder_kl_temporal_decoder import AutoencoderKLTemporalDecoder
13
+ from .autoencoder_kl_wan import AutoencoderKLWan
14
+ from .autoencoder_oobleck import AutoencoderOobleck
15
+ from .autoencoder_tiny import AutoencoderTiny
16
+ from .consistency_decoder_vae import ConsistencyDecoderVAE
17
+ from .vq_model import VQModel
pythonProject/.venv/Lib/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_asym_kl.cpython-310.pyc ADDED
Binary file (6.66 kB). View file
 
pythonProject/.venv/Lib/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_dc.cpython-310.pyc ADDED
Binary file (22.3 kB). View file
 
pythonProject/.venv/Lib/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl.cpython-310.pyc ADDED
Binary file (20.6 kB). View file
 
pythonProject/.venv/Lib/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_allegro.cpython-310.pyc ADDED
Binary file (27.4 kB). View file
 
pythonProject/.venv/Lib/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_cogvideox.cpython-310.pyc ADDED
Binary file (40 kB). View file
 
pythonProject/.venv/Lib/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_cosmos.cpython-310.pyc ADDED
Binary file (35.1 kB). View file
 
pythonProject/.venv/Lib/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_hunyuan_video.cpython-310.pyc ADDED
Binary file (29.3 kB). View file
 
pythonProject/.venv/Lib/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_ltx.cpython-310.pyc ADDED
Binary file (40.9 kB). View file
 
pythonProject/.venv/Lib/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_magvit.cpython-310.pyc ADDED
Binary file (26.8 kB). View file
 
pythonProject/.venv/Lib/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_mochi.cpython-310.pyc ADDED
Binary file (31.9 kB). View file
 
pythonProject/.venv/Lib/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_qwenimage.cpython-310.pyc ADDED
Binary file (31.7 kB). View file
 
pythonProject/.venv/Lib/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_temporal_decoder.cpython-310.pyc ADDED
Binary file (12.4 kB). View file
 
pythonProject/.venv/Lib/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_wan.cpython-310.pyc ADDED
Binary file (37.6 kB). View file
 
pythonProject/.venv/Lib/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_oobleck.cpython-310.pyc ADDED
Binary file (15 kB). View file
 
pythonProject/.venv/Lib/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_tiny.cpython-310.pyc ADDED
Binary file (12.9 kB). View file
 
pythonProject/.venv/Lib/site-packages/diffusers/models/autoencoders/__pycache__/consistency_decoder_vae.cpython-310.pyc ADDED
Binary file (16.4 kB). View file
 
pythonProject/.venv/Lib/site-packages/diffusers/models/autoencoders/__pycache__/vae.cpython-310.pyc ADDED
Binary file (23.5 kB). View file
 
pythonProject/.venv/Lib/site-packages/diffusers/models/autoencoders/__pycache__/vq_model.cpython-310.pyc ADDED
Binary file (6.78 kB). View file
 
pythonProject/.venv/Lib/site-packages/diffusers/models/autoencoders/vq_model.py ADDED
@@ -0,0 +1,185 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ from dataclasses import dataclass
15
+ from typing import Optional, Tuple, Union
16
+
17
+ import torch
18
+ import torch.nn as nn
19
+
20
+ from ...configuration_utils import ConfigMixin, register_to_config
21
+ from ...utils import BaseOutput
22
+ from ...utils.accelerate_utils import apply_forward_hook
23
+ from ..autoencoders.vae import Decoder, DecoderOutput, Encoder, VectorQuantizer
24
+ from ..modeling_utils import ModelMixin
25
+
26
+
27
+ @dataclass
28
+ class VQEncoderOutput(BaseOutput):
29
+ """
30
+ Output of VQModel encoding method.
31
+
32
+ Args:
33
+ latents (`torch.Tensor` of shape `(batch_size, num_channels, height, width)`):
34
+ The encoded output sample from the last layer of the model.
35
+ """
36
+
37
+ latents: torch.Tensor
38
+
39
+
40
+ class VQModel(ModelMixin, ConfigMixin):
41
+ r"""
42
+ A VQ-VAE model for decoding latent representations.
43
+
44
+ This model inherits from [`ModelMixin`]. Check the superclass documentation for it's generic methods implemented
45
+ for all models (such as downloading or saving).
46
+
47
+ Parameters:
48
+ in_channels (int, *optional*, defaults to 3): Number of channels in the input image.
49
+ out_channels (int, *optional*, defaults to 3): Number of channels in the output.
50
+ down_block_types (`Tuple[str]`, *optional*, defaults to `("DownEncoderBlock2D",)`):
51
+ Tuple of downsample block types.
52
+ up_block_types (`Tuple[str]`, *optional*, defaults to `("UpDecoderBlock2D",)`):
53
+ Tuple of upsample block types.
54
+ block_out_channels (`Tuple[int]`, *optional*, defaults to `(64,)`):
55
+ Tuple of block output channels.
56
+ layers_per_block (`int`, *optional*, defaults to `1`): Number of layers per block.
57
+ act_fn (`str`, *optional*, defaults to `"silu"`): The activation function to use.
58
+ latent_channels (`int`, *optional*, defaults to `3`): Number of channels in the latent space.
59
+ sample_size (`int`, *optional*, defaults to `32`): Sample input size.
60
+ num_vq_embeddings (`int`, *optional*, defaults to `256`): Number of codebook vectors in the VQ-VAE.
61
+ norm_num_groups (`int`, *optional*, defaults to `32`): Number of groups for normalization layers.
62
+ vq_embed_dim (`int`, *optional*): Hidden dim of codebook vectors in the VQ-VAE.
63
+ scaling_factor (`float`, *optional*, defaults to `0.18215`):
64
+ The component-wise standard deviation of the trained latent space computed using the first batch of the
65
+ training set. This is used to scale the latent space to have unit variance when training the diffusion
66
+ model. The latents are scaled with the formula `z = z * scaling_factor` before being passed to the
67
+ diffusion model. When decoding, the latents are scaled back to the original scale with the formula: `z = 1
68
+ / scaling_factor * z`. For more details, refer to sections 4.3.2 and D.1 of the [High-Resolution Image
69
+ Synthesis with Latent Diffusion Models](https://huggingface.co/papers/2112.10752) paper.
70
+ norm_type (`str`, *optional*, defaults to `"group"`):
71
+ Type of normalization layer to use. Can be one of `"group"` or `"spatial"`.
72
+ """
73
+
74
+ _skip_layerwise_casting_patterns = ["quantize"]
75
+ _supports_group_offloading = False
76
+
77
+ @register_to_config
78
+ def __init__(
79
+ self,
80
+ in_channels: int = 3,
81
+ out_channels: int = 3,
82
+ down_block_types: Tuple[str, ...] = ("DownEncoderBlock2D",),
83
+ up_block_types: Tuple[str, ...] = ("UpDecoderBlock2D",),
84
+ block_out_channels: Tuple[int, ...] = (64,),
85
+ layers_per_block: int = 1,
86
+ act_fn: str = "silu",
87
+ latent_channels: int = 3,
88
+ sample_size: int = 32,
89
+ num_vq_embeddings: int = 256,
90
+ norm_num_groups: int = 32,
91
+ vq_embed_dim: Optional[int] = None,
92
+ scaling_factor: float = 0.18215,
93
+ norm_type: str = "group", # group, spatial
94
+ mid_block_add_attention=True,
95
+ lookup_from_codebook=False,
96
+ force_upcast=False,
97
+ ):
98
+ super().__init__()
99
+
100
+ # pass init params to Encoder
101
+ self.encoder = Encoder(
102
+ in_channels=in_channels,
103
+ out_channels=latent_channels,
104
+ down_block_types=down_block_types,
105
+ block_out_channels=block_out_channels,
106
+ layers_per_block=layers_per_block,
107
+ act_fn=act_fn,
108
+ norm_num_groups=norm_num_groups,
109
+ double_z=False,
110
+ mid_block_add_attention=mid_block_add_attention,
111
+ )
112
+
113
+ vq_embed_dim = vq_embed_dim if vq_embed_dim is not None else latent_channels
114
+
115
+ self.quant_conv = nn.Conv2d(latent_channels, vq_embed_dim, 1)
116
+ self.quantize = VectorQuantizer(num_vq_embeddings, vq_embed_dim, beta=0.25, remap=None, sane_index_shape=False)
117
+ self.post_quant_conv = nn.Conv2d(vq_embed_dim, latent_channels, 1)
118
+
119
+ # pass init params to Decoder
120
+ self.decoder = Decoder(
121
+ in_channels=latent_channels,
122
+ out_channels=out_channels,
123
+ up_block_types=up_block_types,
124
+ block_out_channels=block_out_channels,
125
+ layers_per_block=layers_per_block,
126
+ act_fn=act_fn,
127
+ norm_num_groups=norm_num_groups,
128
+ norm_type=norm_type,
129
+ mid_block_add_attention=mid_block_add_attention,
130
+ )
131
+
132
+ @apply_forward_hook
133
+ def encode(self, x: torch.Tensor, return_dict: bool = True) -> VQEncoderOutput:
134
+ h = self.encoder(x)
135
+ h = self.quant_conv(h)
136
+
137
+ if not return_dict:
138
+ return (h,)
139
+
140
+ return VQEncoderOutput(latents=h)
141
+
142
+ @apply_forward_hook
143
+ def decode(
144
+ self, h: torch.Tensor, force_not_quantize: bool = False, return_dict: bool = True, shape=None
145
+ ) -> Union[DecoderOutput, torch.Tensor]:
146
+ # also go through quantization layer
147
+ if not force_not_quantize:
148
+ quant, commit_loss, _ = self.quantize(h)
149
+ elif self.config.lookup_from_codebook:
150
+ quant = self.quantize.get_codebook_entry(h, shape)
151
+ commit_loss = torch.zeros((h.shape[0])).to(h.device, dtype=h.dtype)
152
+ else:
153
+ quant = h
154
+ commit_loss = torch.zeros((h.shape[0])).to(h.device, dtype=h.dtype)
155
+ quant2 = self.post_quant_conv(quant)
156
+ dec = self.decoder(quant2, quant if self.config.norm_type == "spatial" else None)
157
+
158
+ if not return_dict:
159
+ return dec, commit_loss
160
+
161
+ return DecoderOutput(sample=dec, commit_loss=commit_loss)
162
+
163
+ def forward(
164
+ self, sample: torch.Tensor, return_dict: bool = True
165
+ ) -> Union[DecoderOutput, Tuple[torch.Tensor, ...]]:
166
+ r"""
167
+ The [`VQModel`] forward method.
168
+
169
+ Args:
170
+ sample (`torch.Tensor`): Input sample.
171
+ return_dict (`bool`, *optional*, defaults to `True`):
172
+ Whether or not to return a [`models.autoencoders.vq_model.VQEncoderOutput`] instead of a plain tuple.
173
+
174
+ Returns:
175
+ [`~models.autoencoders.vq_model.VQEncoderOutput`] or `tuple`:
176
+ If return_dict is True, a [`~models.autoencoders.vq_model.VQEncoderOutput`] is returned, otherwise a
177
+ plain `tuple` is returned.
178
+ """
179
+
180
+ h = self.encode(sample).latents
181
+ dec = self.decode(h)
182
+
183
+ if not return_dict:
184
+ return dec.sample, dec.commit_loss
185
+ return dec