frank2e2e commited on
Commit
b080d50
·
verified ·
1 Parent(s): f2aab02

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +13 -0
  2. comfyui/audio_encoders/put_audio_encoder_models_here +0 -0
  3. comfyui/checkpoints/put_checkpoints_here +0 -0
  4. comfyui/clip/clip_l.safetensors +3 -0
  5. comfyui/clip/flux_clip_l.safetensors +3 -0
  6. comfyui/clip/flux_t5xxl_fp8_e4m3fn_scaled.safetensors +3 -0
  7. comfyui/clip/put_clip_or_text_encoder_models_here +0 -0
  8. comfyui/clip_vision/clip_vision_h.safetensors +3 -0
  9. comfyui/clip_vision/put_clip_vision_models_here +0 -0
  10. comfyui/configs/anything_v3.yaml +73 -0
  11. comfyui/configs/v1-inference.yaml +70 -0
  12. comfyui/configs/v1-inference_clip_skip_2.yaml +73 -0
  13. comfyui/configs/v1-inference_clip_skip_2_fp16.yaml +74 -0
  14. comfyui/configs/v1-inference_fp16.yaml +71 -0
  15. comfyui/configs/v1-inpainting-inference.yaml +71 -0
  16. comfyui/configs/v2-inference-v.yaml +68 -0
  17. comfyui/configs/v2-inference-v_fp32.yaml +68 -0
  18. comfyui/configs/v2-inference.yaml +67 -0
  19. comfyui/configs/v2-inference_fp32.yaml +67 -0
  20. comfyui/configs/v2-inpainting-inference.yaml +158 -0
  21. comfyui/controlnet/put_controlnets_and_t2i_here +0 -0
  22. comfyui/diffusers/put_diffusers_models_here +0 -0
  23. comfyui/diffusion_models/MelBandRoformer_fp16.safetensors +3 -0
  24. comfyui/diffusion_models/put_diffusion_model_files_here +0 -0
  25. comfyui/diffusion_models/z-image-turbo-fp8-e4m3fn.safetensors +3 -0
  26. comfyui/embeddings/put_embeddings_or_textual_inversion_concepts_here +0 -0
  27. comfyui/face_detection/detection_Resnet50_Final.pth +3 -0
  28. comfyui/face_detection/parsing_bisenet.pth +3 -0
  29. comfyui/face_restoration/codeformer.pth +3 -0
  30. comfyui/gligen/put_gligen_models_here +0 -0
  31. comfyui/hypernetworks/put_hypernetworks_here +0 -0
  32. comfyui/latent_upscale_models/put_latent_upscale_models_here +0 -0
  33. comfyui/loras/FLUX.1-Turbo-Alpha-4steps.safetensors +3 -0
  34. comfyui/loras/Wan2.2-Lightning_I2V-A14B-4steps-lora_HIGH_fp16.safetensors +3 -0
  35. comfyui/loras/Wan2.2-Lightning_I2V-A14B-4steps-lora_LOW_fp16.safetensors +3 -0
  36. comfyui/loras/Wan2.2-Lightning_T2V-v1.1-A14B-4steps-lora_HIGH_fp16.safetensors +3 -0
  37. comfyui/loras/Wan2.2-Lightning_T2V-v1.1-A14B-4steps-lora_LOW_fp16.safetensors +3 -0
  38. comfyui/loras/Wan21_T2V_14B_lightx2v_cfg_step_distill_lora_rank32.safetensors +3 -0
  39. comfyui/loras/lightx2v_I2V_14B_480p_cfg_step_distill_rank64_bf16.safetensors +3 -0
  40. comfyui/loras/put_loras_here +0 -0
  41. comfyui/loras/svi-shot.safetensors +3 -0
  42. comfyui/model_patches/put_model_patches_here +0 -0
  43. comfyui/photomaker/put_photomaker_models_here +0 -0
  44. comfyui/style_models/put_t2i_style_model_here +0 -0
  45. comfyui/text_encoders/put_text_encoder_files_here +0 -0
  46. comfyui/text_encoders/qwen_3_4b.safetensors +3 -0
  47. comfyui/text_encoders/umt5_xxl_fp16.safetensors +3 -0
  48. comfyui/text_encoders/umt5_xxl_fp8_e4m3fn_scaled.safetensors +3 -0
  49. comfyui/transformers/TencentGameMate/chinese-wav2vec2-base/.gitattributes +27 -0
  50. comfyui/transformers/TencentGameMate/chinese-wav2vec2-base/README.md +61 -0
.gitattributes CHANGED
@@ -33,3 +33,16 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ comfyui/unet/Wan2.2-T2V-A14B-HighNoise-Q4_K_S.gguf filter=lfs diff=lfs merge=lfs -text
37
+ comfyui/unet/Wan2.2-T2V-A14B-LowNoise-Q4_K_S.gguf filter=lfs diff=lfs merge=lfs -text
38
+ comfyui/unet/Wan2_1-InfiniteTalk_Multi_Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text
39
+ comfyui/unet/Wan2_1-InfiniteTalk_Single_Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text
40
+ comfyui/unet/flux1-kontext-dev-Q4_K_S.gguf filter=lfs diff=lfs merge=lfs -text
41
+ comfyui/unet/wan2.1-i2v-14b-480p-Q4_K_S.gguf filter=lfs diff=lfs merge=lfs -text
42
+ comfyui/unet/wan2.2_i2v_high_noise_14B_Q4_K_S.gguf filter=lfs diff=lfs merge=lfs -text
43
+ comfyui/unet/wan2.2_i2v_low_noise_14B_Q4_K_S.gguf filter=lfs diff=lfs merge=lfs -text
44
+ index-tts/hf_cache/models--amphion--MaskGCT/blobs/ec947271175d8cad75ec37e83aa487e27c97a0f72a303393772da5ffa84bddf2 filter=lfs diff=lfs merge=lfs -text
45
+ index-tts/hf_cache/models--facebook--w2v-bert-2.0/blobs/eb890c9660ed6e3414b6812e27257b8ce5454365d5490d3ad581ea60b93be043 filter=lfs diff=lfs merge=lfs -text
46
+ index-tts/hf_cache/models--funasr--campplus/blobs/3388cf5fd3493c9ac9c69851d8e7a8badcfb4f3dc631020c4961371646d5ada8 filter=lfs diff=lfs merge=lfs -text
47
+ index-tts/hf_cache/models--nvidia--bigvgan_v2_22khz_80band_256x/blobs/e95ba25972d3de0628d99cd156e9315a9c018899bf739988959ebe3544080ced filter=lfs diff=lfs merge=lfs -text
48
+ index-tts/qwen0.6bemo4-merge/tokenizer.json filter=lfs diff=lfs merge=lfs -text
comfyui/audio_encoders/put_audio_encoder_models_here ADDED
File without changes
comfyui/checkpoints/put_checkpoints_here ADDED
File without changes
comfyui/clip/clip_l.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:660c6f5b1abae9dc498ac2d21e1347d2abdb0cf6c0c0c8576cd796491d9a6cdd
3
+ size 246144152
comfyui/clip/flux_clip_l.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:660c6f5b1abae9dc498ac2d21e1347d2abdb0cf6c0c0c8576cd796491d9a6cdd
3
+ size 246144152
comfyui/clip/flux_t5xxl_fp8_e4m3fn_scaled.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a498f0485dc9536735258018417c3fd7758dc3bccc0a645feaa472b34955557a
3
+ size 5157348688
comfyui/clip/put_clip_or_text_encoder_models_here ADDED
File without changes
comfyui/clip_vision/clip_vision_h.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64a7ef761bfccbadbaa3da77366aac4185a6c58fa5de5f589b42a65bcc21f161
3
+ size 1264219396
comfyui/clip_vision/put_clip_vision_models_here ADDED
File without changes
comfyui/configs/anything_v3.yaml ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ base_learning_rate: 1.0e-04
3
+ target: ldm.models.diffusion.ddpm.LatentDiffusion
4
+ params:
5
+ linear_start: 0.00085
6
+ linear_end: 0.0120
7
+ num_timesteps_cond: 1
8
+ log_every_t: 200
9
+ timesteps: 1000
10
+ first_stage_key: "jpg"
11
+ cond_stage_key: "txt"
12
+ image_size: 64
13
+ channels: 4
14
+ cond_stage_trainable: false # Note: different from the one we trained before
15
+ conditioning_key: crossattn
16
+ monitor: val/loss_simple_ema
17
+ scale_factor: 0.18215
18
+ use_ema: False
19
+
20
+ scheduler_config: # 10000 warmup steps
21
+ target: ldm.lr_scheduler.LambdaLinearScheduler
22
+ params:
23
+ warm_up_steps: [ 10000 ]
24
+ cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
25
+ f_start: [ 1.e-6 ]
26
+ f_max: [ 1. ]
27
+ f_min: [ 1. ]
28
+
29
+ unet_config:
30
+ target: ldm.modules.diffusionmodules.openaimodel.UNetModel
31
+ params:
32
+ image_size: 32 # unused
33
+ in_channels: 4
34
+ out_channels: 4
35
+ model_channels: 320
36
+ attention_resolutions: [ 4, 2, 1 ]
37
+ num_res_blocks: 2
38
+ channel_mult: [ 1, 2, 4, 4 ]
39
+ num_heads: 8
40
+ use_spatial_transformer: True
41
+ transformer_depth: 1
42
+ context_dim: 768
43
+ use_checkpoint: True
44
+ legacy: False
45
+
46
+ first_stage_config:
47
+ target: ldm.models.autoencoder.AutoencoderKL
48
+ params:
49
+ embed_dim: 4
50
+ monitor: val/rec_loss
51
+ ddconfig:
52
+ double_z: true
53
+ z_channels: 4
54
+ resolution: 256
55
+ in_channels: 3
56
+ out_ch: 3
57
+ ch: 128
58
+ ch_mult:
59
+ - 1
60
+ - 2
61
+ - 4
62
+ - 4
63
+ num_res_blocks: 2
64
+ attn_resolutions: []
65
+ dropout: 0.0
66
+ lossconfig:
67
+ target: torch.nn.Identity
68
+
69
+ cond_stage_config:
70
+ target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
71
+ params:
72
+ layer: "hidden"
73
+ layer_idx: -2
comfyui/configs/v1-inference.yaml ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ base_learning_rate: 1.0e-04
3
+ target: ldm.models.diffusion.ddpm.LatentDiffusion
4
+ params:
5
+ linear_start: 0.00085
6
+ linear_end: 0.0120
7
+ num_timesteps_cond: 1
8
+ log_every_t: 200
9
+ timesteps: 1000
10
+ first_stage_key: "jpg"
11
+ cond_stage_key: "txt"
12
+ image_size: 64
13
+ channels: 4
14
+ cond_stage_trainable: false # Note: different from the one we trained before
15
+ conditioning_key: crossattn
16
+ monitor: val/loss_simple_ema
17
+ scale_factor: 0.18215
18
+ use_ema: False
19
+
20
+ scheduler_config: # 10000 warmup steps
21
+ target: ldm.lr_scheduler.LambdaLinearScheduler
22
+ params:
23
+ warm_up_steps: [ 10000 ]
24
+ cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
25
+ f_start: [ 1.e-6 ]
26
+ f_max: [ 1. ]
27
+ f_min: [ 1. ]
28
+
29
+ unet_config:
30
+ target: ldm.modules.diffusionmodules.openaimodel.UNetModel
31
+ params:
32
+ image_size: 32 # unused
33
+ in_channels: 4
34
+ out_channels: 4
35
+ model_channels: 320
36
+ attention_resolutions: [ 4, 2, 1 ]
37
+ num_res_blocks: 2
38
+ channel_mult: [ 1, 2, 4, 4 ]
39
+ num_heads: 8
40
+ use_spatial_transformer: True
41
+ transformer_depth: 1
42
+ context_dim: 768
43
+ use_checkpoint: True
44
+ legacy: False
45
+
46
+ first_stage_config:
47
+ target: ldm.models.autoencoder.AutoencoderKL
48
+ params:
49
+ embed_dim: 4
50
+ monitor: val/rec_loss
51
+ ddconfig:
52
+ double_z: true
53
+ z_channels: 4
54
+ resolution: 256
55
+ in_channels: 3
56
+ out_ch: 3
57
+ ch: 128
58
+ ch_mult:
59
+ - 1
60
+ - 2
61
+ - 4
62
+ - 4
63
+ num_res_blocks: 2
64
+ attn_resolutions: []
65
+ dropout: 0.0
66
+ lossconfig:
67
+ target: torch.nn.Identity
68
+
69
+ cond_stage_config:
70
+ target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
comfyui/configs/v1-inference_clip_skip_2.yaml ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ base_learning_rate: 1.0e-04
3
+ target: ldm.models.diffusion.ddpm.LatentDiffusion
4
+ params:
5
+ linear_start: 0.00085
6
+ linear_end: 0.0120
7
+ num_timesteps_cond: 1
8
+ log_every_t: 200
9
+ timesteps: 1000
10
+ first_stage_key: "jpg"
11
+ cond_stage_key: "txt"
12
+ image_size: 64
13
+ channels: 4
14
+ cond_stage_trainable: false # Note: different from the one we trained before
15
+ conditioning_key: crossattn
16
+ monitor: val/loss_simple_ema
17
+ scale_factor: 0.18215
18
+ use_ema: False
19
+
20
+ scheduler_config: # 10000 warmup steps
21
+ target: ldm.lr_scheduler.LambdaLinearScheduler
22
+ params:
23
+ warm_up_steps: [ 10000 ]
24
+ cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
25
+ f_start: [ 1.e-6 ]
26
+ f_max: [ 1. ]
27
+ f_min: [ 1. ]
28
+
29
+ unet_config:
30
+ target: ldm.modules.diffusionmodules.openaimodel.UNetModel
31
+ params:
32
+ image_size: 32 # unused
33
+ in_channels: 4
34
+ out_channels: 4
35
+ model_channels: 320
36
+ attention_resolutions: [ 4, 2, 1 ]
37
+ num_res_blocks: 2
38
+ channel_mult: [ 1, 2, 4, 4 ]
39
+ num_heads: 8
40
+ use_spatial_transformer: True
41
+ transformer_depth: 1
42
+ context_dim: 768
43
+ use_checkpoint: True
44
+ legacy: False
45
+
46
+ first_stage_config:
47
+ target: ldm.models.autoencoder.AutoencoderKL
48
+ params:
49
+ embed_dim: 4
50
+ monitor: val/rec_loss
51
+ ddconfig:
52
+ double_z: true
53
+ z_channels: 4
54
+ resolution: 256
55
+ in_channels: 3
56
+ out_ch: 3
57
+ ch: 128
58
+ ch_mult:
59
+ - 1
60
+ - 2
61
+ - 4
62
+ - 4
63
+ num_res_blocks: 2
64
+ attn_resolutions: []
65
+ dropout: 0.0
66
+ lossconfig:
67
+ target: torch.nn.Identity
68
+
69
+ cond_stage_config:
70
+ target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
71
+ params:
72
+ layer: "hidden"
73
+ layer_idx: -2
comfyui/configs/v1-inference_clip_skip_2_fp16.yaml ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ base_learning_rate: 1.0e-04
3
+ target: ldm.models.diffusion.ddpm.LatentDiffusion
4
+ params:
5
+ linear_start: 0.00085
6
+ linear_end: 0.0120
7
+ num_timesteps_cond: 1
8
+ log_every_t: 200
9
+ timesteps: 1000
10
+ first_stage_key: "jpg"
11
+ cond_stage_key: "txt"
12
+ image_size: 64
13
+ channels: 4
14
+ cond_stage_trainable: false # Note: different from the one we trained before
15
+ conditioning_key: crossattn
16
+ monitor: val/loss_simple_ema
17
+ scale_factor: 0.18215
18
+ use_ema: False
19
+
20
+ scheduler_config: # 10000 warmup steps
21
+ target: ldm.lr_scheduler.LambdaLinearScheduler
22
+ params:
23
+ warm_up_steps: [ 10000 ]
24
+ cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
25
+ f_start: [ 1.e-6 ]
26
+ f_max: [ 1. ]
27
+ f_min: [ 1. ]
28
+
29
+ unet_config:
30
+ target: ldm.modules.diffusionmodules.openaimodel.UNetModel
31
+ params:
32
+ use_fp16: True
33
+ image_size: 32 # unused
34
+ in_channels: 4
35
+ out_channels: 4
36
+ model_channels: 320
37
+ attention_resolutions: [ 4, 2, 1 ]
38
+ num_res_blocks: 2
39
+ channel_mult: [ 1, 2, 4, 4 ]
40
+ num_heads: 8
41
+ use_spatial_transformer: True
42
+ transformer_depth: 1
43
+ context_dim: 768
44
+ use_checkpoint: True
45
+ legacy: False
46
+
47
+ first_stage_config:
48
+ target: ldm.models.autoencoder.AutoencoderKL
49
+ params:
50
+ embed_dim: 4
51
+ monitor: val/rec_loss
52
+ ddconfig:
53
+ double_z: true
54
+ z_channels: 4
55
+ resolution: 256
56
+ in_channels: 3
57
+ out_ch: 3
58
+ ch: 128
59
+ ch_mult:
60
+ - 1
61
+ - 2
62
+ - 4
63
+ - 4
64
+ num_res_blocks: 2
65
+ attn_resolutions: []
66
+ dropout: 0.0
67
+ lossconfig:
68
+ target: torch.nn.Identity
69
+
70
+ cond_stage_config:
71
+ target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
72
+ params:
73
+ layer: "hidden"
74
+ layer_idx: -2
comfyui/configs/v1-inference_fp16.yaml ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ base_learning_rate: 1.0e-04
3
+ target: ldm.models.diffusion.ddpm.LatentDiffusion
4
+ params:
5
+ linear_start: 0.00085
6
+ linear_end: 0.0120
7
+ num_timesteps_cond: 1
8
+ log_every_t: 200
9
+ timesteps: 1000
10
+ first_stage_key: "jpg"
11
+ cond_stage_key: "txt"
12
+ image_size: 64
13
+ channels: 4
14
+ cond_stage_trainable: false # Note: different from the one we trained before
15
+ conditioning_key: crossattn
16
+ monitor: val/loss_simple_ema
17
+ scale_factor: 0.18215
18
+ use_ema: False
19
+
20
+ scheduler_config: # 10000 warmup steps
21
+ target: ldm.lr_scheduler.LambdaLinearScheduler
22
+ params:
23
+ warm_up_steps: [ 10000 ]
24
+ cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
25
+ f_start: [ 1.e-6 ]
26
+ f_max: [ 1. ]
27
+ f_min: [ 1. ]
28
+
29
+ unet_config:
30
+ target: ldm.modules.diffusionmodules.openaimodel.UNetModel
31
+ params:
32
+ use_fp16: True
33
+ image_size: 32 # unused
34
+ in_channels: 4
35
+ out_channels: 4
36
+ model_channels: 320
37
+ attention_resolutions: [ 4, 2, 1 ]
38
+ num_res_blocks: 2
39
+ channel_mult: [ 1, 2, 4, 4 ]
40
+ num_heads: 8
41
+ use_spatial_transformer: True
42
+ transformer_depth: 1
43
+ context_dim: 768
44
+ use_checkpoint: True
45
+ legacy: False
46
+
47
+ first_stage_config:
48
+ target: ldm.models.autoencoder.AutoencoderKL
49
+ params:
50
+ embed_dim: 4
51
+ monitor: val/rec_loss
52
+ ddconfig:
53
+ double_z: true
54
+ z_channels: 4
55
+ resolution: 256
56
+ in_channels: 3
57
+ out_ch: 3
58
+ ch: 128
59
+ ch_mult:
60
+ - 1
61
+ - 2
62
+ - 4
63
+ - 4
64
+ num_res_blocks: 2
65
+ attn_resolutions: []
66
+ dropout: 0.0
67
+ lossconfig:
68
+ target: torch.nn.Identity
69
+
70
+ cond_stage_config:
71
+ target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
comfyui/configs/v1-inpainting-inference.yaml ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ base_learning_rate: 7.5e-05
3
+ target: ldm.models.diffusion.ddpm.LatentInpaintDiffusion
4
+ params:
5
+ linear_start: 0.00085
6
+ linear_end: 0.0120
7
+ num_timesteps_cond: 1
8
+ log_every_t: 200
9
+ timesteps: 1000
10
+ first_stage_key: "jpg"
11
+ cond_stage_key: "txt"
12
+ image_size: 64
13
+ channels: 4
14
+ cond_stage_trainable: false # Note: different from the one we trained before
15
+ conditioning_key: hybrid # important
16
+ monitor: val/loss_simple_ema
17
+ scale_factor: 0.18215
18
+ finetune_keys: null
19
+
20
+ scheduler_config: # 10000 warmup steps
21
+ target: ldm.lr_scheduler.LambdaLinearScheduler
22
+ params:
23
+ warm_up_steps: [ 2500 ] # NOTE for resuming. use 10000 if starting from scratch
24
+ cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
25
+ f_start: [ 1.e-6 ]
26
+ f_max: [ 1. ]
27
+ f_min: [ 1. ]
28
+
29
+ unet_config:
30
+ target: ldm.modules.diffusionmodules.openaimodel.UNetModel
31
+ params:
32
+ image_size: 32 # unused
33
+ in_channels: 9 # 4 data + 4 downscaled image + 1 mask
34
+ out_channels: 4
35
+ model_channels: 320
36
+ attention_resolutions: [ 4, 2, 1 ]
37
+ num_res_blocks: 2
38
+ channel_mult: [ 1, 2, 4, 4 ]
39
+ num_heads: 8
40
+ use_spatial_transformer: True
41
+ transformer_depth: 1
42
+ context_dim: 768
43
+ use_checkpoint: True
44
+ legacy: False
45
+
46
+ first_stage_config:
47
+ target: ldm.models.autoencoder.AutoencoderKL
48
+ params:
49
+ embed_dim: 4
50
+ monitor: val/rec_loss
51
+ ddconfig:
52
+ double_z: true
53
+ z_channels: 4
54
+ resolution: 256
55
+ in_channels: 3
56
+ out_ch: 3
57
+ ch: 128
58
+ ch_mult:
59
+ - 1
60
+ - 2
61
+ - 4
62
+ - 4
63
+ num_res_blocks: 2
64
+ attn_resolutions: []
65
+ dropout: 0.0
66
+ lossconfig:
67
+ target: torch.nn.Identity
68
+
69
+ cond_stage_config:
70
+ target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
71
+
comfyui/configs/v2-inference-v.yaml ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ base_learning_rate: 1.0e-4
3
+ target: ldm.models.diffusion.ddpm.LatentDiffusion
4
+ params:
5
+ parameterization: "v"
6
+ linear_start: 0.00085
7
+ linear_end: 0.0120
8
+ num_timesteps_cond: 1
9
+ log_every_t: 200
10
+ timesteps: 1000
11
+ first_stage_key: "jpg"
12
+ cond_stage_key: "txt"
13
+ image_size: 64
14
+ channels: 4
15
+ cond_stage_trainable: false
16
+ conditioning_key: crossattn
17
+ monitor: val/loss_simple_ema
18
+ scale_factor: 0.18215
19
+ use_ema: False # we set this to false because this is an inference only config
20
+
21
+ unet_config:
22
+ target: ldm.modules.diffusionmodules.openaimodel.UNetModel
23
+ params:
24
+ use_checkpoint: True
25
+ use_fp16: True
26
+ image_size: 32 # unused
27
+ in_channels: 4
28
+ out_channels: 4
29
+ model_channels: 320
30
+ attention_resolutions: [ 4, 2, 1 ]
31
+ num_res_blocks: 2
32
+ channel_mult: [ 1, 2, 4, 4 ]
33
+ num_head_channels: 64 # need to fix for flash-attn
34
+ use_spatial_transformer: True
35
+ use_linear_in_transformer: True
36
+ transformer_depth: 1
37
+ context_dim: 1024
38
+ legacy: False
39
+
40
+ first_stage_config:
41
+ target: ldm.models.autoencoder.AutoencoderKL
42
+ params:
43
+ embed_dim: 4
44
+ monitor: val/rec_loss
45
+ ddconfig:
46
+ #attn_type: "vanilla-xformers"
47
+ double_z: true
48
+ z_channels: 4
49
+ resolution: 256
50
+ in_channels: 3
51
+ out_ch: 3
52
+ ch: 128
53
+ ch_mult:
54
+ - 1
55
+ - 2
56
+ - 4
57
+ - 4
58
+ num_res_blocks: 2
59
+ attn_resolutions: []
60
+ dropout: 0.0
61
+ lossconfig:
62
+ target: torch.nn.Identity
63
+
64
+ cond_stage_config:
65
+ target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
66
+ params:
67
+ freeze: True
68
+ layer: "penultimate"
comfyui/configs/v2-inference-v_fp32.yaml ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ base_learning_rate: 1.0e-4
3
+ target: ldm.models.diffusion.ddpm.LatentDiffusion
4
+ params:
5
+ parameterization: "v"
6
+ linear_start: 0.00085
7
+ linear_end: 0.0120
8
+ num_timesteps_cond: 1
9
+ log_every_t: 200
10
+ timesteps: 1000
11
+ first_stage_key: "jpg"
12
+ cond_stage_key: "txt"
13
+ image_size: 64
14
+ channels: 4
15
+ cond_stage_trainable: false
16
+ conditioning_key: crossattn
17
+ monitor: val/loss_simple_ema
18
+ scale_factor: 0.18215
19
+ use_ema: False # we set this to false because this is an inference only config
20
+
21
+ unet_config:
22
+ target: ldm.modules.diffusionmodules.openaimodel.UNetModel
23
+ params:
24
+ use_checkpoint: True
25
+ use_fp16: False
26
+ image_size: 32 # unused
27
+ in_channels: 4
28
+ out_channels: 4
29
+ model_channels: 320
30
+ attention_resolutions: [ 4, 2, 1 ]
31
+ num_res_blocks: 2
32
+ channel_mult: [ 1, 2, 4, 4 ]
33
+ num_head_channels: 64 # need to fix for flash-attn
34
+ use_spatial_transformer: True
35
+ use_linear_in_transformer: True
36
+ transformer_depth: 1
37
+ context_dim: 1024
38
+ legacy: False
39
+
40
+ first_stage_config:
41
+ target: ldm.models.autoencoder.AutoencoderKL
42
+ params:
43
+ embed_dim: 4
44
+ monitor: val/rec_loss
45
+ ddconfig:
46
+ #attn_type: "vanilla-xformers"
47
+ double_z: true
48
+ z_channels: 4
49
+ resolution: 256
50
+ in_channels: 3
51
+ out_ch: 3
52
+ ch: 128
53
+ ch_mult:
54
+ - 1
55
+ - 2
56
+ - 4
57
+ - 4
58
+ num_res_blocks: 2
59
+ attn_resolutions: []
60
+ dropout: 0.0
61
+ lossconfig:
62
+ target: torch.nn.Identity
63
+
64
+ cond_stage_config:
65
+ target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
66
+ params:
67
+ freeze: True
68
+ layer: "penultimate"
comfyui/configs/v2-inference.yaml ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ base_learning_rate: 1.0e-4
3
+ target: ldm.models.diffusion.ddpm.LatentDiffusion
4
+ params:
5
+ linear_start: 0.00085
6
+ linear_end: 0.0120
7
+ num_timesteps_cond: 1
8
+ log_every_t: 200
9
+ timesteps: 1000
10
+ first_stage_key: "jpg"
11
+ cond_stage_key: "txt"
12
+ image_size: 64
13
+ channels: 4
14
+ cond_stage_trainable: false
15
+ conditioning_key: crossattn
16
+ monitor: val/loss_simple_ema
17
+ scale_factor: 0.18215
18
+ use_ema: False # we set this to false because this is an inference only config
19
+
20
+ unet_config:
21
+ target: ldm.modules.diffusionmodules.openaimodel.UNetModel
22
+ params:
23
+ use_checkpoint: True
24
+ use_fp16: True
25
+ image_size: 32 # unused
26
+ in_channels: 4
27
+ out_channels: 4
28
+ model_channels: 320
29
+ attention_resolutions: [ 4, 2, 1 ]
30
+ num_res_blocks: 2
31
+ channel_mult: [ 1, 2, 4, 4 ]
32
+ num_head_channels: 64 # need to fix for flash-attn
33
+ use_spatial_transformer: True
34
+ use_linear_in_transformer: True
35
+ transformer_depth: 1
36
+ context_dim: 1024
37
+ legacy: False
38
+
39
+ first_stage_config:
40
+ target: ldm.models.autoencoder.AutoencoderKL
41
+ params:
42
+ embed_dim: 4
43
+ monitor: val/rec_loss
44
+ ddconfig:
45
+ #attn_type: "vanilla-xformers"
46
+ double_z: true
47
+ z_channels: 4
48
+ resolution: 256
49
+ in_channels: 3
50
+ out_ch: 3
51
+ ch: 128
52
+ ch_mult:
53
+ - 1
54
+ - 2
55
+ - 4
56
+ - 4
57
+ num_res_blocks: 2
58
+ attn_resolutions: []
59
+ dropout: 0.0
60
+ lossconfig:
61
+ target: torch.nn.Identity
62
+
63
+ cond_stage_config:
64
+ target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
65
+ params:
66
+ freeze: True
67
+ layer: "penultimate"
comfyui/configs/v2-inference_fp32.yaml ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ base_learning_rate: 1.0e-4
3
+ target: ldm.models.diffusion.ddpm.LatentDiffusion
4
+ params:
5
+ linear_start: 0.00085
6
+ linear_end: 0.0120
7
+ num_timesteps_cond: 1
8
+ log_every_t: 200
9
+ timesteps: 1000
10
+ first_stage_key: "jpg"
11
+ cond_stage_key: "txt"
12
+ image_size: 64
13
+ channels: 4
14
+ cond_stage_trainable: false
15
+ conditioning_key: crossattn
16
+ monitor: val/loss_simple_ema
17
+ scale_factor: 0.18215
18
+ use_ema: False # we set this to false because this is an inference only config
19
+
20
+ unet_config:
21
+ target: ldm.modules.diffusionmodules.openaimodel.UNetModel
22
+ params:
23
+ use_checkpoint: True
24
+ use_fp16: False
25
+ image_size: 32 # unused
26
+ in_channels: 4
27
+ out_channels: 4
28
+ model_channels: 320
29
+ attention_resolutions: [ 4, 2, 1 ]
30
+ num_res_blocks: 2
31
+ channel_mult: [ 1, 2, 4, 4 ]
32
+ num_head_channels: 64 # need to fix for flash-attn
33
+ use_spatial_transformer: True
34
+ use_linear_in_transformer: True
35
+ transformer_depth: 1
36
+ context_dim: 1024
37
+ legacy: False
38
+
39
+ first_stage_config:
40
+ target: ldm.models.autoencoder.AutoencoderKL
41
+ params:
42
+ embed_dim: 4
43
+ monitor: val/rec_loss
44
+ ddconfig:
45
+ #attn_type: "vanilla-xformers"
46
+ double_z: true
47
+ z_channels: 4
48
+ resolution: 256
49
+ in_channels: 3
50
+ out_ch: 3
51
+ ch: 128
52
+ ch_mult:
53
+ - 1
54
+ - 2
55
+ - 4
56
+ - 4
57
+ num_res_blocks: 2
58
+ attn_resolutions: []
59
+ dropout: 0.0
60
+ lossconfig:
61
+ target: torch.nn.Identity
62
+
63
+ cond_stage_config:
64
+ target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
65
+ params:
66
+ freeze: True
67
+ layer: "penultimate"
comfyui/configs/v2-inpainting-inference.yaml ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ base_learning_rate: 5.0e-05
3
+ target: ldm.models.diffusion.ddpm.LatentInpaintDiffusion
4
+ params:
5
+ linear_start: 0.00085
6
+ linear_end: 0.0120
7
+ num_timesteps_cond: 1
8
+ log_every_t: 200
9
+ timesteps: 1000
10
+ first_stage_key: "jpg"
11
+ cond_stage_key: "txt"
12
+ image_size: 64
13
+ channels: 4
14
+ cond_stage_trainable: false
15
+ conditioning_key: hybrid
16
+ scale_factor: 0.18215
17
+ monitor: val/loss_simple_ema
18
+ finetune_keys: null
19
+ use_ema: False
20
+
21
+ unet_config:
22
+ target: ldm.modules.diffusionmodules.openaimodel.UNetModel
23
+ params:
24
+ use_checkpoint: True
25
+ image_size: 32 # unused
26
+ in_channels: 9
27
+ out_channels: 4
28
+ model_channels: 320
29
+ attention_resolutions: [ 4, 2, 1 ]
30
+ num_res_blocks: 2
31
+ channel_mult: [ 1, 2, 4, 4 ]
32
+ num_head_channels: 64 # need to fix for flash-attn
33
+ use_spatial_transformer: True
34
+ use_linear_in_transformer: True
35
+ transformer_depth: 1
36
+ context_dim: 1024
37
+ legacy: False
38
+
39
+ first_stage_config:
40
+ target: ldm.models.autoencoder.AutoencoderKL
41
+ params:
42
+ embed_dim: 4
43
+ monitor: val/rec_loss
44
+ ddconfig:
45
+ #attn_type: "vanilla-xformers"
46
+ double_z: true
47
+ z_channels: 4
48
+ resolution: 256
49
+ in_channels: 3
50
+ out_ch: 3
51
+ ch: 128
52
+ ch_mult:
53
+ - 1
54
+ - 2
55
+ - 4
56
+ - 4
57
+ num_res_blocks: 2
58
+ attn_resolutions: [ ]
59
+ dropout: 0.0
60
+ lossconfig:
61
+ target: torch.nn.Identity
62
+
63
+ cond_stage_config:
64
+ target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
65
+ params:
66
+ freeze: True
67
+ layer: "penultimate"
68
+
69
+
70
+ data:
71
+ target: ldm.data.laion.WebDataModuleFromConfig
72
+ params:
73
+ tar_base: null # for concat as in LAION-A
74
+ p_unsafe_threshold: 0.1
75
+ filter_word_list: "data/filters.yaml"
76
+ max_pwatermark: 0.45
77
+ batch_size: 8
78
+ num_workers: 6
79
+ multinode: True
80
+ min_size: 512
81
+ train:
82
+ shards:
83
+ - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-0/{00000..18699}.tar -"
84
+ - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-1/{00000..18699}.tar -"
85
+ - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-2/{00000..18699}.tar -"
86
+ - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-3/{00000..18699}.tar -"
87
+ - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-4/{00000..18699}.tar -" #{00000-94333}.tar"
88
+ shuffle: 10000
89
+ image_key: jpg
90
+ image_transforms:
91
+ - target: torchvision.transforms.Resize
92
+ params:
93
+ size: 512
94
+ interpolation: 3
95
+ - target: torchvision.transforms.RandomCrop
96
+ params:
97
+ size: 512
98
+ postprocess:
99
+ target: ldm.data.laion.AddMask
100
+ params:
101
+ mode: "512train-large"
102
+ p_drop: 0.25
103
+ # NOTE use enough shards to avoid empty validation loops in workers
104
+ validation:
105
+ shards:
106
+ - "pipe:aws s3 cp s3://deep-floyd-s3/datasets/laion_cleaned-part5/{93001..94333}.tar - "
107
+ shuffle: 0
108
+ image_key: jpg
109
+ image_transforms:
110
+ - target: torchvision.transforms.Resize
111
+ params:
112
+ size: 512
113
+ interpolation: 3
114
+ - target: torchvision.transforms.CenterCrop
115
+ params:
116
+ size: 512
117
+ postprocess:
118
+ target: ldm.data.laion.AddMask
119
+ params:
120
+ mode: "512train-large"
121
+ p_drop: 0.25
122
+
123
+ lightning:
124
+ find_unused_parameters: True
125
+ modelcheckpoint:
126
+ params:
127
+ every_n_train_steps: 5000
128
+
129
+ callbacks:
130
+ metrics_over_trainsteps_checkpoint:
131
+ params:
132
+ every_n_train_steps: 10000
133
+
134
+ image_logger:
135
+ target: main.ImageLogger
136
+ params:
137
+ enable_autocast: False
138
+ disabled: False
139
+ batch_frequency: 1000
140
+ max_images: 4
141
+ increase_log_steps: False
142
+ log_first_step: False
143
+ log_images_kwargs:
144
+ use_ema_scope: False
145
+ inpaint: False
146
+ plot_progressive_rows: False
147
+ plot_diffusion_rows: False
148
+ N: 4
149
+ unconditional_guidance_scale: 5.0
150
+ unconditional_guidance_label: [""]
151
+ ddim_steps: 50 # todo check these out for depth2img,
152
+ ddim_eta: 0.0 # todo check these out for depth2img,
153
+
154
+ trainer:
155
+ benchmark: True
156
+ val_check_interval: 5000000
157
+ num_sanity_val_steps: 0
158
+ accumulate_grad_batches: 1
comfyui/controlnet/put_controlnets_and_t2i_here ADDED
File without changes
comfyui/diffusers/put_diffusers_models_here ADDED
File without changes
comfyui/diffusion_models/MelBandRoformer_fp16.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6119aef379a6c7264e0b37db65ae1e6488b8ca4a00baf56d6d244737b8488226
3
+ size 456479072
comfyui/diffusion_models/put_diffusion_model_files_here ADDED
File without changes
comfyui/diffusion_models/z-image-turbo-fp8-e4m3fn.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0825d3cd7588ef1ccc5bd60c427005e407a1e2316beb00f9d58dcae1e9492462
3
+ size 6154974440
comfyui/embeddings/put_embeddings_or_textual_inversion_concepts_here ADDED
File without changes
comfyui/face_detection/detection_Resnet50_Final.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d1de9c2944f2ccddca5f5e010ea5ae64a39845a86311af6fdf30841b0a5a16d
3
+ size 109497761
comfyui/face_detection/parsing_bisenet.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:468e13ca13a9b43cc0881a9f99083a430e9c0a38abd935431d1c28ee94b26567
3
+ size 53289463
comfyui/face_restoration/codeformer.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1009e537e0c2a07d4cabce6355f53cb66767cd4b4297ec7a4a64ca4b8a5684b7
3
+ size 376637898
comfyui/gligen/put_gligen_models_here ADDED
File without changes
comfyui/hypernetworks/put_hypernetworks_here ADDED
File without changes
comfyui/latent_upscale_models/put_latent_upscale_models_here ADDED
File without changes
comfyui/loras/FLUX.1-Turbo-Alpha-4steps.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77f7523a5e9c3da6cfc730c6b07461129fa52997ea06168e9ed5312228aa0bff
3
+ size 694082424
comfyui/loras/Wan2.2-Lightning_I2V-A14B-4steps-lora_HIGH_fp16.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:216cce62ecb5687d55fdc035b1cdbea01d1a9a8100ee924d2155cf1d64f1050b
3
+ size 613561776
comfyui/loras/Wan2.2-Lightning_I2V-A14B-4steps-lora_LOW_fp16.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3d474925042ec908c212c6c4c1e338637e27585c3970253f65b9638a9f5874b
3
+ size 613561776
comfyui/loras/Wan2.2-Lightning_T2V-v1.1-A14B-4steps-lora_HIGH_fp16.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b64ddbc09685b44048d20e4eebdb53bce8aa9198595fdd822a87c256953a2723
3
+ size 613561776
comfyui/loras/Wan2.2-Lightning_T2V-v1.1-A14B-4steps-lora_LOW_fp16.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d184b3eb5ca7992a76aa39026acc6720e2278eedd5c7d1363ccaf530789ec6e
3
+ size 613561776
comfyui/loras/Wan21_T2V_14B_lightx2v_cfg_step_distill_lora_rank32.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:914fafa5dded72e6c470d6f764b97fe4d71567dae7cc01d1181b310efd5c37a6
3
+ size 316822496
comfyui/loras/lightx2v_I2V_14B_480p_cfg_step_distill_rank64_bf16.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85c4a61c30e0497aa44b91d93a893b624708461a56fe5485183b28fa07e2dfb3
3
+ size 738005744
comfyui/loras/put_loras_here ADDED
File without changes
comfyui/loras/svi-shot.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa100103b04b034bb7968b8d61a608b9177a2618277d8e8dd5c40abf8dafb9cb
3
+ size 2453769560
comfyui/model_patches/put_model_patches_here ADDED
File without changes
comfyui/photomaker/put_photomaker_models_here ADDED
File without changes
comfyui/style_models/put_t2i_style_model_here ADDED
File without changes
comfyui/text_encoders/put_text_encoder_files_here ADDED
File without changes
comfyui/text_encoders/qwen_3_4b.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c671498573ac2f7a5501502ccce8d2b08ea6ca2f661c458e708f36b36edfc5a
3
+ size 8044982048
comfyui/text_encoders/umt5_xxl_fp16.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b8850f1961e1cf8a77cca4c964a358d303f490833c6c087d0cff4b2f99db2af
3
+ size 11366399385
comfyui/text_encoders/umt5_xxl_fp8_e4m3fn_scaled.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3355d30191f1f066b26d93fba017ae9809dce6c627dda5f6a66eaa651204f68
3
+ size 6735906897
comfyui/transformers/TencentGameMate/chinese-wav2vec2-base/.gitattributes ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ftz filter=lfs diff=lfs merge=lfs -text
6
+ *.gz filter=lfs diff=lfs merge=lfs -text
7
+ *.h5 filter=lfs diff=lfs merge=lfs -text
8
+ *.joblib filter=lfs diff=lfs merge=lfs -text
9
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
10
+ *.model filter=lfs diff=lfs merge=lfs -text
11
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
12
+ *.onnx filter=lfs diff=lfs merge=lfs -text
13
+ *.ot filter=lfs diff=lfs merge=lfs -text
14
+ *.parquet filter=lfs diff=lfs merge=lfs -text
15
+ *.pb filter=lfs diff=lfs merge=lfs -text
16
+ *.pt filter=lfs diff=lfs merge=lfs -text
17
+ *.pth filter=lfs diff=lfs merge=lfs -text
18
+ *.rar filter=lfs diff=lfs merge=lfs -text
19
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
20
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
21
+ *.tflite filter=lfs diff=lfs merge=lfs -text
22
+ *.tgz filter=lfs diff=lfs merge=lfs -text
23
+ *.wasm filter=lfs diff=lfs merge=lfs -text
24
+ *.xz filter=lfs diff=lfs merge=lfs -text
25
+ *.zip filter=lfs diff=lfs merge=lfs -text
26
+ *.zstandard filter=lfs diff=lfs merge=lfs -text
27
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
comfyui/transformers/TencentGameMate/chinese-wav2vec2-base/README.md ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ ---
4
+ Pretrained on 10k hours WenetSpeech L subset. More details in [TencentGameMate/chinese_speech_pretrain](https://github.com/TencentGameMate/chinese_speech_pretrain)
5
+
6
+ This model does not have a tokenizer as it was pretrained on audio alone.
7
+ In order to use this model speech recognition, a tokenizer should be created and the model should be fine-tuned on labeled text data.
8
+
9
+ python package:
10
+ transformers==4.16.2
11
+
12
+ ```python
13
+
14
+
15
+ import torch
16
+ import torch.nn.functional as F
17
+ import soundfile as sf
18
+ from fairseq import checkpoint_utils
19
+
20
+ from transformers import (
21
+ Wav2Vec2FeatureExtractor,
22
+ Wav2Vec2ForPreTraining,
23
+ Wav2Vec2Model,
24
+ )
25
+ from transformers.models.wav2vec2.modeling_wav2vec2 import _compute_mask_indices
26
+
27
+ model_path=""
28
+ wav_path=""
29
+ mask_prob=0.0
30
+ mask_length=10
31
+
32
+ feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(model_path)
33
+ model = Wav2Vec2Model.from_pretrained(model_path)
34
+
35
+ # for pretrain: Wav2Vec2ForPreTraining
36
+ # model = Wav2Vec2ForPreTraining.from_pretrained(model_path)
37
+
38
+ model = model.to(device)
39
+ model = model.half()
40
+ model.eval()
41
+
42
+ wav, sr = sf.read(wav_path)
43
+ input_values = feature_extractor(wav, return_tensors="pt").input_values
44
+ input_values = input_values.half()
45
+ input_values = input_values.to(device)
46
+
47
+ # for Wav2Vec2ForPreTraining
48
+ # batch_size, raw_sequence_length = input_values.shape
49
+ # sequence_length = model._get_feat_extract_output_lengths(raw_sequence_length)
50
+ # mask_time_indices = _compute_mask_indices((batch_size, sequence_length), mask_prob=0.0, mask_length=2)
51
+ # mask_time_indices = torch.tensor(mask_time_indices, device=input_values.device, dtype=torch.long)
52
+
53
+ with torch.no_grad():
54
+ outputs = model(input_values)
55
+ last_hidden_state = outputs.last_hidden_state
56
+
57
+ # for Wav2Vec2ForPreTraining
58
+ # outputs = model(input_values, mask_time_indices=mask_time_indices, output_hidden_states=True)
59
+ # last_hidden_state = outputs.hidden_states[-1]
60
+
61
+ ```