Video-to-Video
Diffusers
Safetensors
W-Shuoyan commited on
Commit
82bcd1b
·
verified ·
1 Parent(s): 0a8da2e

Upload folder using huggingface_hub

Browse files
prompt_embeddings/empty.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49738b5f634bc7c7ebad8e0ba01bf8c4eb5930b84c38d00f78dc0d5bc0a417cc
3
+ size 1851488
scheduler/scheduler_config.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "CogVideoXDDIMScheduler",
3
+ "_diffusers_version": "0.32.0.dev0",
4
+ "beta_end": 0.012,
5
+ "beta_schedule": "scaled_linear",
6
+ "beta_start": 0.00085,
7
+ "clip_sample": false,
8
+ "clip_sample_range": 1.0,
9
+ "num_train_timesteps": 1000,
10
+ "prediction_type": "v_prediction",
11
+ "rescale_betas_zero_snr": true,
12
+ "sample_max_value": 1.0,
13
+ "set_alpha_to_one": true,
14
+ "snr_shift_scale": 1.0,
15
+ "steps_offset": 0,
16
+ "timestep_spacing": "trailing",
17
+ "trained_betas": null
18
+ }
transformer/config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "CogVideoXTransformer3D_STVSR_Model",
3
+ "_diffusers_version": "0.35.2",
4
+ "activation_fn": "gelu-approximate",
5
+ "attention_bias": true,
6
+ "attention_head_dim": 64,
7
+ "dropout": 0.0,
8
+ "flip_sin_to_cos": true,
9
+ "freq_shift": 0,
10
+ "in_channels": 16,
11
+ "max_text_seq_length": 226,
12
+ "norm_elementwise_affine": true,
13
+ "norm_eps": 1e-05,
14
+ "num_attention_heads": 48,
15
+ "num_layers": 42,
16
+ "ofs_embed_dim": null,
17
+ "out_channels": 16,
18
+ "patch_bias": false,
19
+ "patch_size": 2,
20
+ "patch_size_t": 2,
21
+ "sample_frames": 81,
22
+ "sample_height": 96,
23
+ "sample_width": 170,
24
+ "spatial_interpolation_scale": 1.875,
25
+ "temporal_compression_ratio": 4,
26
+ "temporal_interpolation_scale": 1.0,
27
+ "text_embed_dim": 4096,
28
+ "time_embed_dim": 512,
29
+ "timestep_activation_fn": "silu",
30
+ "use_learned_positional_embeddings": false,
31
+ "use_rotary_positional_embeddings": true
32
+ }
transformer/diffusion_pytorch_model-00001-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1314c04b052a7425c299e1786ac85d9be5b815f0270f35c1845f82cb98406c87
3
+ size 9981874672
transformer/diffusion_pytorch_model-00002-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41e8c2aaf6cd3fa1f0ed044237de9fdf26a115cd100bbf41468f93c837b63ab1
3
+ size 2353601080
transformer/diffusion_pytorch_model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
vae/config.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "CogVideoXDecoder3D_STVSR",
3
+ "_diffusers_version": "0.32.0.dev0",
4
+ "act_fn": "silu",
5
+ "block_out_channels": [
6
+ 128,
7
+ 256,
8
+ 256,
9
+ 512
10
+ ],
11
+ "down_block_types": [
12
+ "CogVideoXDownBlock3D",
13
+ "CogVideoXDownBlock3D",
14
+ "CogVideoXDownBlock3D",
15
+ "CogVideoXDownBlock3D"
16
+ ],
17
+ "force_upcast": true,
18
+ "in_channels": 3,
19
+ "latent_channels": 16,
20
+ "latents_mean": null,
21
+ "latents_std": null,
22
+ "layers_per_block": 3,
23
+ "norm_eps": 1e-06,
24
+ "norm_num_groups": 32,
25
+ "out_channels": 3,
26
+ "sample_height": 480,
27
+ "sample_width": 720,
28
+ "scaling_factor": 0.7,
29
+ "shift_factor": null,
30
+ "temporal_compression_ratio": 4,
31
+ "up_block_types": [
32
+ "CogVideoXUpBlock3D",
33
+ "CogVideoXUpBlock3D",
34
+ "CogVideoXUpBlock3D",
35
+ "CogVideoXUpBlock3D"
36
+ ],
37
+ "use_post_quant_conv": false,
38
+ "use_quant_conv": false,
39
+ "invert_scale_latents": true
40
+ }
vae/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:329fe3b7c3c45bf09697de2903cadbc65b8b999f7b612d972484d1061b47aa69
3
+ size 509282718