attong39 commited on
Commit
f523f14
·
verified ·
1 Parent(s): 483aeb5

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +2 -0
  2. Custom Resolutions Instructions.txt +16 -0
  3. Dockerfile +92 -0
  4. LICENSE.txt +46 -0
  5. README.md +304 -10
  6. defaults/ReadMe.txt +13 -0
  7. defaults/ace_step_v1.json +19 -0
  8. defaults/alpha.json +19 -0
  9. defaults/alpha2.json +19 -0
  10. defaults/alpha2_sf.json +18 -0
  11. defaults/alpha_sf.json +17 -0
  12. defaults/animate.json +17 -0
  13. defaults/chatterbox.json +18 -0
  14. defaults/chrono_edit.json +13 -0
  15. defaults/chrono_edit_distill.json +16 -0
  16. defaults/fantasy.json +11 -0
  17. defaults/flf2v_720p.json +16 -0
  18. defaults/flux.json +15 -0
  19. defaults/flux2_dev.json +16 -0
  20. defaults/flux2_dev_nvfp4.json +15 -0
  21. defaults/flux2_klein_4b.json +16 -0
  22. defaults/flux2_klein_9b.json +16 -0
  23. defaults/flux_chroma.json +17 -0
  24. defaults/flux_chroma_radiance.json +17 -0
  25. defaults/flux_dev_kontext.json +16 -0
  26. defaults/flux_dev_kontext_dreamomni2.json +19 -0
  27. defaults/flux_dev_umo.json +23 -0
  28. defaults/flux_dev_uso.json +16 -0
  29. defaults/flux_krea.json +15 -0
  30. defaults/flux_schnell.json +16 -0
  31. defaults/flux_srpo.json +14 -0
  32. defaults/flux_srpo_uso.json +16 -0
  33. defaults/fun_inp.json +13 -0
  34. defaults/fun_inp_1.3B.json +11 -0
  35. defaults/heartmula_oss_3b.json +14 -0
  36. defaults/heartmula_rl_oss_3b_20260123.json +15 -0
  37. defaults/hunyuan.json +12 -0
  38. defaults/hunyuan_1_5_480_i2v.json +17 -0
  39. defaults/hunyuan_1_5_480_i2v_step_distilled.json +18 -0
  40. defaults/hunyuan_1_5_480_t2v.json +16 -0
  41. defaults/hunyuan_1_5_480_t2v_lightx2v.json +17 -0
  42. defaults/hunyuan_1_5_i2v.json +17 -0
  43. defaults/hunyuan_1_5_t2v.json +16 -0
  44. defaults/hunyuan_1_5_upsampler.json +22 -0
  45. defaults/hunyuan_1_5_upsampler_1080.json +22 -0
  46. defaults/hunyuan_avatar.json +12 -0
  47. defaults/hunyuan_custom.json +12 -0
  48. defaults/hunyuan_custom_audio.json +12 -0
  49. defaults/hunyuan_custom_edit.json +12 -0
  50. defaults/hunyuan_i2v.json +12 -0
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ preprocessing/matanyone/tutorial_multi_targets.mp4 filter=lfs diff=lfs merge=lfs -text
37
+ preprocessing/matanyone/tutorial_single_target.mp4 filter=lfs diff=lfs merge=lfs -text
Custom Resolutions Instructions.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ You can override the choice of Resolutions offered by WanGP, if you create a file "resolutions.json" in the main WanGP folder.
2
+ This file is composed of a list of 2 elements sublists. Each 2 elements sublist should have the format ["Label", "WxH"] where W, H are respectively the Width and Height of the resolution. Please make sure that W and H are multiples of 16. The letter "x" should be placed inbetween these two dimensions.
3
+
4
+ Here is below a sample "resolutions.json" file :
5
+
6
+ [
7
+ ["1280x720 (16:9, 720p)", "1280x720"],
8
+ ["720x1280 (9:16, 720p)", "720x1280"],
9
+ ["1024x1024 (1:1, 720p)", "1024x1024"],
10
+ ["1280x544 (21:9, 720p)", "1280x544"],
11
+ ["544x1280 (9:21, 720p)", "544x1280"],
12
+ ["1104x832 (4:3, 720p)", "1104x832"],
13
+ ["832x1104 (3:4, 720p)", "832x1104"],
14
+ ["960x960 (1:1, 720p)", "960x960"],
15
+ ["832x480 (16:9, 480p)", "832x480"]
16
+ ]
Dockerfile ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04
2
+
3
+ # Build arg for GPU architectures - specify which CUDA compute capabilities to compile for
4
+ # Common values:
5
+ # 7.0 - Tesla V100
6
+ # 7.5 - RTX 2060, 2070, 2080, Titan RTX
7
+ # 8.0 - A100, A800 (Ampere data center)
8
+ # 8.6 - RTX 3060, 3070, 3080, 3090 (Ampere consumer)
9
+ # 8.9 - RTX 4070, 4080, 4090 (Ada Lovelace)
10
+ # 9.0 - H100, H800 (Hopper data center)
11
+ # 12.0 - RTX 5070, 5080, 5090 (Blackwell) - Note: sm_120 architecture
12
+ #
13
+ # Examples:
14
+ # RTX 3060: --build-arg CUDA_ARCHITECTURES="8.6"
15
+ # RTX 4090: --build-arg CUDA_ARCHITECTURES="8.9"
16
+ # Multiple: --build-arg CUDA_ARCHITECTURES="8.0;8.6;8.9"
17
+ #
18
+ # Note: Including 8.9 or 9.0 may cause compilation issues on some setups
19
+ # Default includes 8.0 and 8.6 for broad Ampere compatibility
20
+ ARG CUDA_ARCHITECTURES="8.0;8.6"
21
+
22
+ ENV DEBIAN_FRONTEND=noninteractive
23
+
24
+ # Install system dependencies
25
+ RUN apt update && \
26
+ apt install -y \
27
+ python3 python3-pip git wget curl cmake ninja-build \
28
+ libgl1 libglib2.0-0 ffmpeg && \
29
+ apt clean
30
+
31
+ WORKDIR /workspace
32
+
33
+ COPY requirements.txt .
34
+
35
+ # Upgrade pip first
36
+ RUN pip install --upgrade pip setuptools wheel
37
+
38
+ # Install requirements if exists
39
+ RUN pip install -r requirements.txt
40
+
41
+ # Install PyTorch with CUDA support
42
+ RUN pip install --extra-index-url https://download.pytorch.org/whl/cu124 \
43
+ torch==2.6.0+cu124 torchvision==0.21.0+cu124
44
+
45
+ # Install SageAttention from git (patch GPU detection)
46
+ ENV TORCH_CUDA_ARCH_LIST="${CUDA_ARCHITECTURES}"
47
+ ENV FORCE_CUDA="1"
48
+ ENV MAX_JOBS="1"
49
+
50
+ COPY <<EOF /tmp/patch_setup.py
51
+ import os
52
+ with open('setup.py', 'r') as f:
53
+ content = f.read()
54
+
55
+ # Get architectures from environment variable
56
+ arch_list = os.environ.get('TORCH_CUDA_ARCH_LIST')
57
+ arch_set = '{' + ', '.join([f'"{arch}"' for arch in arch_list.split(';')]) + '}'
58
+
59
+ # Replace the GPU detection section
60
+ old_section = '''compute_capabilities = set()
61
+ device_count = torch.cuda.device_count()
62
+ for i in range(device_count):
63
+ major, minor = torch.cuda.get_device_capability(i)
64
+ if major < 8:
65
+ warnings.warn(f"skipping GPU {i} with compute capability {major}.{minor}")
66
+ continue
67
+ compute_capabilities.add(f"{major}.{minor}")'''
68
+
69
+ new_section = 'compute_capabilities = ' + arch_set + '''
70
+ print(f"Manually set compute capabilities: {compute_capabilities}")'''
71
+
72
+ content = content.replace(old_section, new_section)
73
+
74
+ with open('setup.py', 'w') as f:
75
+ f.write(content)
76
+ EOF
77
+
78
+ RUN git clone https://github.com/thu-ml/SageAttention.git /tmp/sageattention && \
79
+ cd /tmp/sageattention && \
80
+ python3 /tmp/patch_setup.py && \
81
+ pip install --no-build-isolation .
82
+
83
+ RUN useradd -u 1000 -ms /bin/bash user
84
+
85
+ RUN chown -R user:user /workspace
86
+
87
+ RUN mkdir /home/user/.cache && \
88
+ chown -R user:user /home/user/.cache
89
+
90
+ COPY entrypoint.sh /workspace/entrypoint.sh
91
+
92
+ ENTRYPOINT ["/workspace/entrypoint.sh"]
LICENSE.txt ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ WanGP NON-COMMERCIAL EVALUATION LICENSE 1.0
2
+
3
+ Definitions
4
+ 1.1 “Software” means the source code, binaries, libraries, utilities and UI released under this license.
5
+ 1.2 “Output” means images, videos or other media produced by running the Software.
6
+ 1.3 “Commercial Use” means:
7
+ a) selling, sublicensing, renting, leasing, or otherwise distributing the Software, in whole or in part, for a fee or other consideration; or
8
+ b) offering the Software (or any derivative) as part of a paid product or hosted service; or
9
+ c) using the Software (or any derivative) to provide cloud-based or backend services, where end users access or pay for those services.
10
+
11
+ License Grant
12
+ Subject to Section 3:
13
+ a) You are granted a worldwide, non-exclusive, royalty-free, revocable license to use, reproduce, modify and distribute the Software for non-commercial purposes only.
14
+ b) You are granted a worldwide, non-exclusive, royalty-free, irrevocable license to use, reproduce, modify and distribute the Output for any purpose, including commercial sale, provided that any commercial distribution of the Output includes a clear notice that the Output was produced (in whole or in part) using WanGP, along with a hyperlink to the WanGP application’s About tab or repository.
15
+
16
+ Restrictions
17
+ 3.1 You MAY NOT distribute, sublicense or otherwise make available the Software (or any derivative) for Commercial Use.
18
+ 3.2 You MAY sell, license or otherwise commercially exploit the Output without restriction.
19
+ 3.3 If you wish to use the Software for Commercial Use, you must obtain a separate commercial license from the Licensor.
20
+
21
+ Third-Party Components 4.1 The Software includes components licensed under various open-source licenses (e.g., Apache 2.0, MIT, BSD). 4.2 You must comply with all applicable terms of those third-party licenses, including preservation of copyright notices, inclusion of required license texts, and patent-grant provisions. 4.3 You can find the full text of each third-party license via the “About” tab in the WanGP application, which provides links to their original GitHub repositories.
22
+
23
+ Attribution
24
+ 5.1 You must give appropriate credit by including:
25
+ • a copy of this license (or a link to it), and
26
+ • a notice that your use is based on “WanGP”.
27
+ 5.2 You may do so in any reasonable manner, but not in any way that suggests the Licensor endorses you or your use.
28
+
29
+ Disclaimer of Warranty & Liability
30
+ THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED.
31
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE.
32
+
33
+ Commercial Licensing The Licensor may offer commercial licenses for the Software, which grant rights to use the Software for Commercial Use. Please contact [deepbeepmeep@yahoo.com] for terms and pricing.
34
+
35
+ Effective Date & Previous Versions
36
+ 8.1 This license is effective as of the date the LICENSE file is updated in the WanGP repository.
37
+ 8.2 Any copies of the Software obtained under prior license terms before this Effective Date remain governed by those prior terms; such granted rights are irrevocable.
38
+ 8.3 Use of the Software after the release of any subsequent version by the Licensor is subject to the terms of the then-current license, unless a separate agreement is in place.
39
+
40
+ Acceptable Use / Moral Clause
41
+ 9.1 You MAY NOT use the Software or the Output to facilitate or produce content that is illegal, harmful, violent, harassing, defamatory, fraudulent, or otherwise violates applicable laws or fundamental human rights.
42
+ 9.2 You MAY NOT deploy the Software or Output in contexts that promote hate speech, extremist ideology, human rights abuses, or other actions that could foreseeably cause significant harm to individuals or groups.
43
+ 9.3 The Licensor reserves the right to terminate the rights granted under this license if a licensee materially breaches this Acceptable Use clause.
44
+
45
+ END OF LICENSE
46
+
README.md CHANGED
@@ -1,10 +1,304 @@
1
- ---
2
- title: Wan2GP
3
- emoji:
4
- colorFrom: gray
5
- colorTo: red
6
- sdk: docker
7
- pinned: false
8
- ---
9
-
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Wan2GP
3
+ emoji: 🌖
4
+ colorFrom: indigo
5
+ colorTo: red
6
+ sdk: docker
7
+ app_port: 7860
8
+ ---
9
+
10
+ # WanGP
11
+
12
+ -----
13
+ <p align="center">
14
+ <b>WanGP by DeepBeepMeep : The best Open Source Video Generative Models Accessible to the GPU Poor</b>
15
+ </p>
16
+
17
+ WanGP supports the Wan (and derived models) but also Hunyuan Video, Flux, Qwen, Z-Image, LongCat, Kandinsky, LTX 1 & 2, Qwen3 TTS, Chatterbox, HearMula, ... with:
18
+ - Low VRAM requirements (as low as 6 GB of VRAM is sufficient for certain models)
19
+ - Support for old Nvidia GPUs (RTX 10XX, 20xx, ...)
20
+ - Support for AMD GPUs Radeon RX 76XX, 77XX, 78XX & 79XX, instructions in the Installation Section Below.
21
+ - Very Fast on the latest GPUs
22
+ - Easy to use Full Web based interface
23
+ - Support for many checkpoint Quantized formats: int8, fp8, gguf, NV FP4, Nunchaku
24
+ - Auto download of the required model adapted to your specific architecture
25
+ - Tools integrated to facilitate Video Generation : Mask Editor, Prompt Enhancer, Temporal and Spatial Generation, MMAudio, Video Browser, Pose / Depth / Flow extractor, Motion Designer
26
+ - Plenty of ready to use Plug Ins: Gallery Browser, Upscaler, Models/Checkpoints Manager, CivitAI browser and downloader, ...
27
+ - Loras Support to customize each model
28
+ - Queuing system : make your shopping list of videos to generate and come back later
29
+ - Headless mode: launch the generation of multiple image / videos / audio files using a command line
30
+
31
+ **Discord Server to get Help from the WanGP Community and show your Best Gens:** https://discord.gg/g7efUW9jGV
32
+
33
+ **Follow DeepBeepMeep on Twitter/X to get the Latest News**: https://x.com/deepbeepmeep
34
+
35
+ ## 📋 Table of Contents
36
+
37
+ - [🚀 Quick Start](#-quick-start)
38
+ - [📦 Installation](#-installation)
39
+ - [🎯 Usage](#-usage)
40
+ - [📚 Documentation](#-documentation)
41
+ - [🔗 Related Projects](#-related-projects)
42
+
43
+
44
+ ## 🔥 Latest Updates :
45
+
46
+ ### January 29th 2026: WanGP v10.56, Music for your Hearts
47
+
48
+ WanGP Special *TTS* (Text To Speech) Release:
49
+
50
+ - **Heart Mula**: *Suno* quality song with lyrics on your local PC. You can generate up to 4 min of music.
51
+
52
+ - **Ace Step v1**: while waiting for *Ace Step v1.5* (which should be released very soon), enjoy this oldie (2025!) but goodie song generatpr as an appetizer. Ace Step v1 is a very fast Song generator. It is a *Diffusion* based, so dont hesitate to turn on Profile 4 to go as low as 4B VRAM while remaining fast.
53
+
54
+ - **Qwen 3 TTS**: you can either do *Voice Cloning*, *Generate a Custom Voice based on a Prompt* or use a *Predefined Voice*
55
+
56
+ - **TTS Features**:
57
+ - **Early stop** : you can abort a gen, while still keeping what has been generated (will work only for TTS models which are *Autoregressive Models*, no need to ask that for Image/Video gens which are *Diffusion Models*)
58
+ - **Specialized Prompt Enhancers**: if you enter the prompt in Heart Mula *"a song about AI generation"*, *WanGP Prompt Enhancer* will generate the corresponding masterpiece for you. Likewise you can enhance "A speech about AI generation" when using Qwen3 TTS or ChatterBox.
59
+ - **Custom Output folder for Audio Gens**: you can now choose a different folder for the *Audio Outputs*
60
+ - **Default Memory Profile for Audio Models**: TTS models can get very slow if you use profile 4 (being autoregressive models, they will need to load all the layers one per one to generate one single audio token then rinse & repeat). On the other hand, they dont't need as much VRAM, so you can now define a more agressive profile (3+ for instance)
61
+
62
+ - **Z Image Base**: try it if you are into the *Z Image* hype but it will be probably useless for you unless you are a researcher and / or want to build a finetune out of it. This model requires from 35 to 50 steps (4x to 6x slower than *Z Image turbo*) and cfg > 1 (an additional 2x slower) and there is no *Reinforcement Learning* so Output Images wont be as good. The plus side is a higher diversity and *Native Negative Prompt* (versus Z Image virtual Negative Prompt using *NAG*).
63
+
64
+ Note that Z Image Base is very sensitive to the *Attention Mode*: it is not compatible with *Sage 1* as it produces black frames. So I have disabled Sage for RTX 30xx. Also there are reports it produces some vertical banding artifacts with *Sage 2*
65
+
66
+ - **Flux 1/2 NAG** : *Flux 2 Klein* is your new best friend but you miss *Negative Prompts*, *NAG* support for Distilled models will make you best buddies forever as NAG simulates Negative prompts.
67
+
68
+ - **Various Improvements**:
69
+ - Video /Audio Galleries now support deletions of gens done outside WanGP
70
+ - added *MP3 support* for audio outputs
71
+ - *Check for Updates* button for *Plugins* to see in a glance if any of your plugin can be updated
72
+ - *Prompt Enhancer* generates a different enhanced prompt each timee you click on it. You can define in the config tab its gen parameters (top k, temperature)
73
+ - New *Root Loras* folder can be defined in the config Tab. Useful if you have multiple WanGP instances or want to store easily all your loras in a different hard drive
74
+ - added new setting *Attention Mode Override* in the *Misc* tab
75
+ - Experimental: allowed changing *Configuration* during a *Generation*
76
+
77
+ *update 10.51*: new Heart Mula Finetune better at following instructions, Extra settings (cfg, top k) for TTS models, Rife v4\
78
+ *update 10.52*: updated plugin list and added version tracking\
79
+ *update 10.53*: video/audio galleries now support deletions\
80
+ *update 10.54*: added Z Image Base, prompt enhancers improvements, configurable loras root folder\
81
+ *update 10.55*: blocked Sage with Z Image on RTX30xx and added override attention mode settings, allowed changing config during generation\
82
+ *update 10.56*: added NAG for Flux 1/2 & Ace Step v1
83
+
84
+ ### January 20th 2026: WanGP v10.43, The Cost Saver
85
+ *GPUs are expensive, RAM is expensive, SSD are expensive, sadly we live now in a GPU & RAM poor.*
86
+
87
+ WanGP comes again to the rescue:
88
+
89
+ - **GGUF support**: as some of you know, I am not a big fan of this format because when used with image / video generative models we don't get any speed boost (matrices multiplications are still done at 16 bits), VRAM savings are small and quality is worse than with int8/fp8. Still gguf has one advantage: it consumes less RAM and harddrive space. So enjoy gguf support. I have added ready to use *Kijai gguf finetunes* for *LTX 2*.
90
+
91
+ - **Models Manager PlugIn**: use this *Plugin* to identify how much space is taken by each *model* / *finetune* and delete the ones you no longer use. Try to avoid deleting shared files otherwise they will be downloaded again.
92
+
93
+ - **LTX 2 Dual Video & Audio Control**: you no longer need to extract the audio track of a *Control Video* if you want to use it as well to drive the video generation. New mode will allow you to use both motion and audio from Video Control.
94
+
95
+ - **LTX 2 - Custom VAE URL**: some users have asked if they could use the old *Distiller VAE* instead of the new one. To do that, create a *finetune* def based on an existing model definition and save it in the *finetunes/* folder with this entry (check the *docs/FINETUNES.md* doc):
96
+ ```
97
+ "VAE_URLs": ["https://huggingface.co/DeepBeepMeep/LTX-2/resolve/main/ltx-2-19b_vae_old.safetensors"]
98
+ ```
99
+
100
+ - **Flux 2 Klein 4B & 9B**: try these distilled models as fast as Z_Image if not faster but with out of the box image edition capabiltities
101
+
102
+ - **Flux 2 & Qwen Outpainting + Lanpaint**: the inpaint mode of these models support now *outpainting* + more combination possible with *Lanpaint*
103
+
104
+ - **RAM Optimizations for multi minutes Videos**: processing, saving, spatial & Temporal upsampling very long videos should require much less RAM.
105
+
106
+ - **Text Encoder Cache**: if you are asking a Text prompt already used recently with the current model, it will be taken straight from a cache. The cache is optimized to consume little RAM. It wont work with certain models such as Qwen where the Text Prompt is combined internally with an Image.
107
+
108
+ *update 10.41*: added Flux 2 klein\
109
+ *update 10.42*: added RAM optimizations & Text Encoder Cache\
110
+ *update 10.43*: added outpainting for Qwen & Flux 2, Lanpaint for Flux 2
111
+
112
+ ### January 15th 2026: WanGP v10.30, The Need for Speed ...
113
+
114
+ - **LTX Distilled VAE Upgrade**: *Kijai* has observed that the Distilled VAE produces images that were less sharp that the VAE of the Non Distilled model. I have used this as an opportunity to repackage all the LTX 2 checkpoints and reduce their overal HD footprint since they all share around 5GB.
115
+
116
+ **So dont be surprised if the old checkpoints are deleted and new are downloaded !!!**.
117
+
118
+ - **LTX2 Multi Passes Loras multipliers**: *LTX2* supports now loras multiplier that depend on the Pass No. For instance "1;0.5" means 1 will the strength for the first LTX2 pass and 0.5 will be the strength for the second pass.
119
+
120
+ - **New Profile 3.5**: here is the lost kid of *Profile 3* & *Profile 5*, you got tons of VRAM, but little RAM ? Profile 3.5 will be your new friend as it will no longer use Reserved RAM to accelerate transfers. Use Profile 3.5 only if you can fit entirely a *Diffusion / Transformer* model in VRAM, otherwise the gen may be much slower.
121
+
122
+ - **NVFP4 Quantization for LTX 2 & Flux 2**: you will now be able to load *NV FP4* model checkpoints in WanGP. On top of *Wan NV4* which was added recently, we now have *LTX 2 (non distilled)* & *Flux 2* support. NV FP4 uses slightly less VRAM and up to 30% less RAM.
123
+
124
+ To enjoy fully the NV FP4 checkpoints (**at least 30% faster gens**), you will need a RTX 50xx and to upgrade to *Pytorch 2.9.1 / Cuda 13* with the latest version of *lightx2v kernels* (check *docs/INSTALLATION.md*). To observe the speed gain, you have to make sure the workload is quite high (high res, long video).
125
+
126
+
127
+ ### January 13th 2026: WanGP v10.24, When there is no VRAM left there is still some VRAM left ...
128
+
129
+ - **LTX 2 - SUPER VRAM OPTIMIZATIONS**
130
+
131
+ *With WanGP 10.21 HD 720p Video Gens of 10s just need now 8GB of VRAM!*
132
+
133
+ LTX Team said this video gen was for 4k. So I had no choice but to squeeze more VRAM with further optimizations.
134
+
135
+ After much suffering I have managed to reduce by at least 1/3 the VRAM requirements of LTX 2, which means:
136
+ - 10s at 720p can be done with only 8GB of VRAM
137
+ - 10s at 1080p with only 12 GB of VRAM
138
+ - 20s at 1080p with only 16 GB of VRAM
139
+ - 10s at Full 4k (3840 x 2176 !!!) with 24 GB of VRAM. However the bad news is LTX 2 video is not for 4K, as 4K outputs may give you nightmares ...
140
+
141
+ 3K/4K resolutions will be available only if you enable them in the *Config* / *General* tab.
142
+
143
+ - **Ic Loras support**: Use a *Control Video* to transfer *Pose*, *Depth*, *Canny Edges*. I have added some extra tweaks: with WanGP you can restrict the transfer to a *masked area*, define a *denoising strength* (how much the control video is going to be followed) and a *masking strength* (how much unmasked area is impacted)
144
+
145
+ - **Start Image Strength**: This new slider will appear below a *Start Image* or Source *Video*. If you set it to values lower than 1 you may to reduce the static image effect, you get sometime with LTX2 i2v
146
+
147
+ - **Custom Gemma Text Encoder for LTX 2**: As a practical case, the *Heretic* text encoder is now supported by WanGP. Check the *finetune* doc, but in short create a *finetune* that has a *text_encoder_URLS* key that contains a list of one or more file paths or URLs.
148
+
149
+ - **Experimental Auto Recovery Failed Lora Pin**: Some users (with usually PC with less than 64 GB of RAM) have reported Out Of Memory although a model seemed to load just fine when starting a gen with Loras. This is sometime related to WanGP attempting (and failing due to unsufficient reserved RAM) to pin the Loras to Reserved Memory for faster gen. I have experimented a recovery mode that should release sufficient ressources to continue the Video Gen. This may solve the oom crashes with *LTX2 Default (non distilled)*
150
+
151
+ - **Max Loras Pinned Slider**: If the Auto Recovery Mode is still not sufficient, I have added a Slider at the bottom of the *Configuration* / *Performance* tab that you can use to prevent WanGP from Pinning Loras (to do so set it to 0). As if there is no loading attempt there wont be any crash...
152
+
153
+ *update 10.21*: added slider Loras Max Pinning slider\
154
+ *update 10.22*: added support for custom Ltx2 Text Encoder + Auto Recovery mode if Lora Pinning failed\
155
+ *update 10.23*: Fixed text prompt ignore in profile 1 & 2 (this created random output videos)
156
+
157
+ ### January 9st 2026: WanGP v10.11, Spoiled again
158
+
159
+ - **LTX 2**: here is the long awaited *Ovi Challenger*, LTX-2 generates video and an audio soundtrack. As usual this WanGP version is *low VRAM*. You should be able to run it with as low as 10 GB of VRAM. If you have at least 24 GB of VRAM you will be able to generate 20s at 720p in a single window in only 2 minutes with the distilled model. WanGP LTX 2 version supports on day one, *Start/End keyframes*, *Sliding-Window* / *Video Continuation* and *Generation Preview*. A *LTX 2 distilled* is part of the package for a very fast generation.
160
+
161
+ With WanGP v10.11 you can now force your soundtrack, it works like *Multitalk* / *Avatar* except in theory it should work with any kind of sound (not just vocals). Thanks to *Kijai* for showing it was possible.
162
+
163
+ - **Z Image Twin Folder Turbo**: Z Image even faster as this variant can generate images with as little as 1 step (3 steps recommend)
164
+
165
+ - **Qwen LanPaint**: very precise *In Painting*, offers a better integration of the inpainted area in the rest of the image. Beware it is up to 5x slower as it "searches" for the best replacement.
166
+
167
+ - **Optimized Pytorch Compiler** : *Patience is the Mother of Virtue*. Finally I may (or may not) have fixed the PyTorch compiler with the Wan models. It should work in much diverse situations and takes much less time.
168
+
169
+ - **LongCat Video**: experimental support which includes *LongCat Avatar* a talking head model. For the moment it is mostly for models collectors as it is very slow. It needs 40+ steps and each step contains up 3 passes.
170
+
171
+ - **MMaudio NSFW**: for alternative audio background
172
+
173
+ *update v10.11*: LTX 2, use your own soundtrack
174
+
175
+
176
+
177
+
178
+ See full changelog: **[Changelog](docs/CHANGELOG.md)**
179
+
180
+
181
+ ## 🚀 Quick Start
182
+
183
+ **One-click installation:**
184
+ Get started instantly with [Pinokio App](https://pinokio.computer/)\
185
+ It is recommended to use in Pinokio the Community Scripts *wan2gp* or *wan2gp-amd* by **Morpheus** rather than the official Pinokio install.
186
+
187
+
188
+ **Manual installation:**
189
+ ```bash
190
+ git clone https://github.com/deepbeepmeep/Wan2GP.git
191
+ cd Wan2GP
192
+ conda create -n wan2gp python=3.10.9
193
+ conda activate wan2gp
194
+ pip install torch==2.7.1 torchvision torchaudio --index-url https://download.pytorch.org/whl/test/cu128
195
+ pip install -r requirements.txt
196
+ ```
197
+
198
+ **Run the application:**
199
+ ```bash
200
+ python wgp.py
201
+ ```
202
+
203
+ First time using WanGP ? Just check the *Guides* tab, and you will find a selection of recommended models to use.
204
+
205
+ **Update the application:**
206
+ If using Pinokio use Pinokio to update otherwise:
207
+ Get in the directory where WanGP is installed and:
208
+ ```bash
209
+ git pull
210
+ conda activate wan2gp
211
+ pip install -r requirements.txt
212
+ ```
213
+
214
+ if you get some error messages related to git, you may try the following (beware this will overwrite local changes made to the source code of WanGP):
215
+ ```bash
216
+ git fetch origin && git reset --hard origin/main
217
+ conda activate wan2gp
218
+ pip install -r requirements.txt
219
+ ```
220
+
221
+ **Run headless (batch processing):**
222
+
223
+ Process saved queues without launching the web UI:
224
+ ```bash
225
+ # Process a saved queue
226
+ python wgp.py --process my_queue.zip
227
+ ```
228
+ Create your queue in the web UI, save it with "Save Queue", then process it headless. See [CLI Documentation](docs/CLI.md) for details.
229
+
230
+ ## 🐳 Docker:
231
+
232
+ **For Debian-based systems (Ubuntu, Debian, etc.):**
233
+
234
+ ```bash
235
+ ./run-docker-cuda-deb.sh
236
+ ```
237
+
238
+ This automated script will:
239
+
240
+ - Detect your GPU model and VRAM automatically
241
+ - Select optimal CUDA architecture for your GPU
242
+ - Install NVIDIA Docker runtime if needed
243
+ - Build a Docker image with all dependencies
244
+ - Run WanGP with optimal settings for your hardware
245
+
246
+ **Docker environment includes:**
247
+
248
+ - NVIDIA CUDA 12.4.1 with cuDNN support
249
+ - PyTorch 2.6.0 with CUDA 12.4 support
250
+ - SageAttention compiled for your specific GPU architecture
251
+ - Optimized environment variables for performance (TF32, threading, etc.)
252
+ - Automatic cache directory mounting for faster subsequent runs
253
+ - Current directory mounted in container - all downloaded models, loras, generated videos and files are saved locally
254
+
255
+ **Supported GPUs:** RTX 40XX, RTX 30XX, RTX 20XX, GTX 16XX, GTX 10XX, Tesla V100, A100, H100, and more.
256
+
257
+ ## 📦 Installation
258
+
259
+ ### Nvidia
260
+ For detailed installation instructions for different GPU generations:
261
+ - **[Installation Guide](docs/INSTALLATION.md)** - Complete setup instructions for RTX 10XX to RTX 50XX
262
+
263
+ ### AMD
264
+ For detailed installation instructions for different GPU generations:
265
+ - **[Installation Guide](docs/AMD-INSTALLATION.md)** - Complete setup instructions for Radeon RX 76XX, 77XX, 78XX & 79XX
266
+
267
+ ## 🎯 Usage
268
+
269
+ ### Basic Usage
270
+ - **[Getting Started Guide](docs/GETTING_STARTED.md)** - First steps and basic usage
271
+ - **[Models Overview](docs/MODELS.md)** - Available models and their capabilities
272
+
273
+ ### Advanced Features
274
+ - **[Loras Guide](docs/LORAS.md)** - Using and managing Loras for customization
275
+ - **[Finetunes](docs/FINETUNES.md)** - Add manually new models to WanGP
276
+ - **[VACE ControlNet](docs/VACE.md)** - Advanced video control and manipulation
277
+ - **[Command Line Reference](docs/CLI.md)** - All available command line options
278
+
279
+ ## 📚 Documentation
280
+
281
+ - **[Changelog](docs/CHANGELOG.md)** - Latest updates and version history
282
+ - **[Troubleshooting](docs/TROUBLESHOOTING.md)** - Common issues and solutions
283
+
284
+ ## 📚 Video Guides
285
+ - Nice Video that explain how to use Vace:\
286
+ https://www.youtube.com/watch?v=FMo9oN2EAvE
287
+ - Another Vace guide:\
288
+ https://www.youtube.com/watch?v=T5jNiEhf9xk
289
+
290
+ ## 🔗 Related Projects
291
+
292
+ ### Other Models for the GPU Poor
293
+ - **[HuanyuanVideoGP](https://github.com/deepbeepmeep/HunyuanVideoGP)** - One of the best open source Text to Video generators
294
+ - **[Hunyuan3D-2GP](https://github.com/deepbeepmeep/Hunyuan3D-2GP)** - Image to 3D and text to 3D tool
295
+ - **[FluxFillGP](https://github.com/deepbeepmeep/FluxFillGP)** - Inpainting/outpainting tools based on Flux
296
+ - **[Cosmos1GP](https://github.com/deepbeepmeep/Cosmos1GP)** - Text to world generator and image/video to world
297
+ - **[OminiControlGP](https://github.com/deepbeepmeep/OminiControlGP)** - Flux-derived application for object transfer
298
+ - **[YuE GP](https://github.com/deepbeepmeep/YuEGP)** - Song generator with instruments and singer's voice
299
+
300
+ ---
301
+
302
+ <p align="center">
303
+ Made with ❤️ by DeepBeepMeep
304
+ </p>
defaults/ReadMe.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Please dot not modify any file in this Folder.
2
+
3
+ If you want to change a property of a default model, copy the corrresponding model file in the ./finetunes folder and modify the properties you want to change in the new file.
4
+ If a property is not in the new file, it will be inherited automatically from the default file that matches the same name file.
5
+
6
+ For instance to hide a model:
7
+
8
+ {
9
+ "model":
10
+ {
11
+ "visible": false
12
+ }
13
+ }
defaults/ace_step_v1.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "name": "TTS ACE-Step v1 3.5B",
4
+ "architecture": "ace_step_v1",
5
+ "description": "ACE-Step, a fast open-source foundation diffusion based model for music generation that overcomes key limitations of existing approaches and achieves state-of-the-art performance.",
6
+ "URLs": [
7
+ "https://huggingface.co/DeepBeepMeep/TTS/resolve/main/ace_step_v1_transformer_bf16.safetensors",
8
+ "https://huggingface.co/DeepBeepMeep/TTS/resolve/main/ace_step_v1_transformer_quanto_bf16_int8.safetensors"
9
+ ]
10
+ },
11
+ "prompt": "[Verse]\nNeon rain on the city line\nYou hum the tune and I fall in time\n[Chorus]\nHold me close and keep the time",
12
+ "alt_prompt": "Dreamy synth-pop with shimmering pads, soft vocals, and a slow dance groove.",
13
+ "audio_prompt_type": "",
14
+ "audio_scale": 0.5,
15
+ "duration_seconds": 20,
16
+ "num_inference_steps": 60,
17
+ "guidance_scale": 7.0,
18
+ "scheduler_type": "euler"
19
+ }
defaults/alpha.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model":
3
+ {
4
+ "name": "Wan2.1 Alpha v1.0 14B",
5
+ "architecture" : "alpha",
6
+ "description": "This model successfully generates various scenes with accurate and clearly rendered transparency. Notably, it can synthesize diverse semi-transparent objects, glowing effects, and fine-grained details such as hair. For each video generated you will find a Zip file with the same name that will contain the corresponding RGBA images.",
7
+ "URLs": "t2v",
8
+ "preload_URLs": [
9
+ "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan_alpha_2.1_vae_rgb_channel.safetensors",
10
+ "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan_alpha_2.1_vae_alpha_channel.safetensors"
11
+ ],
12
+ "loras": [
13
+ "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan_alpha_2.1_dora.safetensors"
14
+ ],
15
+ "loras_multipliers": [ 1 ]
16
+ },
17
+ "prompt": "A large orange octopus is seen resting. The background of the video is transparent."
18
+
19
+ }
defaults/alpha2.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model":
3
+ {
4
+ "name": "Wan2.1 Alpha v2.0 14B",
5
+ "architecture" : "alpha2",
6
+ "description": "Wan-Alpha v2.0 generates transparent videos with fine-grained alpha detail (hair, glow, smoke). For each video, a Zip file with RGBA frames is produced.",
7
+ "URLs": "t2v",
8
+ "preload_URLs": [
9
+ "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan_alpha_2.1_vae_rgb_channel_v2.safetensors",
10
+ "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan_alpha_2.1_vae_alpha_channel_v2.safetensors",
11
+ "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/gauss_mask"
12
+ ],
13
+ "loras": [
14
+ "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan_alpha_2.1_dora_v2.safetensors"
15
+ ],
16
+ "loras_multipliers": [ 1 ]
17
+ },
18
+ "prompt": "A large orange octopus is seen resting. The background of the video is transparent."
19
+ }
defaults/alpha2_sf.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model":
3
+ {
4
+ "name": "Wan2.1 Alpha v2.0 Lightning 14B",
5
+ "architecture" : "alpha2",
6
+ "description": "Wan-Alpha v2.0 Lightning with transparent video output and RGBA frames zip.",
7
+ "URLs": "t2v_sf",
8
+ "preload_URLs": "alpha2",
9
+ "loras": "alpha2",
10
+ "loras_multipliers": [ 1 ],
11
+ "profiles_dir" : [""]
12
+ },
13
+ "prompt": "A large orange octopus is seen resting. The background of the video is transparent.",
14
+ "num_inference_steps": 4,
15
+ "guidance_scale": 1,
16
+ "flow_shift": 3
17
+ }
18
+
defaults/alpha_sf.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model":
3
+ {
4
+ "name": "Wan2.1 Alpha v1.0 Lightning 14B",
5
+ "architecture" : "alpha",
6
+ "description": "This model is accelerated by the Lightning / SelfForcing process. It successfully generates various scenes with accurate and clearly rendered transparency. Notably, it can synthesize diverse semi-transparent objects, glowing effects, and fine-grained details such as hair. For each video generated you will find a Zip file with the same name that will contain the corresponding RGBA images.",
7
+ "URLs": "t2v_sf",
8
+ "preload_URLs": "alpha",
9
+ "loras": "alpha",
10
+ "loras_multipliers": [ 1 ],
11
+ "profiles_dir" : [""]
12
+ },
13
+ "prompt": "A large orange octopus is seen resting. The background of the video is transparent.",
14
+ "num_inference_steps": 4,
15
+ "guidance_scale": 1,
16
+ "flow_shift": 3
17
+ }
defaults/animate.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "name": "Wan2.2 Animate 14B",
4
+ "architecture": "animate",
5
+ "description": "Wan-Animate takes a video and a character image as input, and generates a video in either 'Animation' or 'Replacement' mode. Sliding Window of 81 frames at least are recommeded to obtain the best Style continuity.",
6
+ "URLs": [
7
+ "https://huggingface.co/DeepBeepMeep/Wan2.2/resolve/main/wan2.2_animate_14B_bf16.safetensors",
8
+ "https://huggingface.co/DeepBeepMeep/Wan2.2/resolve/main/wan2.2_animate_14B_quanto_fp16_int8.safetensors",
9
+ "https://huggingface.co/DeepBeepMeep/Wan2.2/resolve/main/wan2.2_animate_14B_quanto_bf16_int8.safetensors"
10
+ ],
11
+ "preload_URLs" :
12
+ [
13
+ "https://huggingface.co/DeepBeepMeep/Wan2.2/resolve/main/wan2.2_animate_relighting_lora.safetensors"
14
+ ],
15
+ "group": "wan2_2"
16
+ }
17
+ }
defaults/chatterbox.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "name": "TTS Chatterbox Multilingual",
4
+ "architecture": "chatterbox",
5
+ "description": "Resemble AI's open multilingual TTS with language selection via model mode.",
6
+ "URLs": []
7
+ },
8
+ "prompt": "Welcome to Chatterbox !",
9
+ "negative_prompt": "",
10
+ "audio_prompt_type": "A",
11
+ "model_mode": "en",
12
+ "repeat_generation": 1,
13
+ "video_length": 0,
14
+ "num_inference_steps": 0,
15
+ "pace": 0.5,
16
+ "exaggeration": 0.5,
17
+ "temperature": 0.8
18
+ }
defaults/chrono_edit.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "name": "Wan2.1 Chrono Edit 14B",
4
+ "architecture": "chrono_edit",
5
+ "description": "This model in an Image Editor that will follow your instructions. It generates internally a video to produce the desired effect on the original image (the result being the End Image). It expects a very specific prompt format. It is why you must absolutely use the Prompt Enhancer that has been tuned for this model.",
6
+ "URLs": [
7
+ "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_chrono_edit_14B_mbf16.safetensors",
8
+ "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_chrono_edit_14B_quanto_mbf16_int8.safetensors",
9
+ "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_chrono_edit_14B_quanto_mfp16_int8.safetensors"
10
+ ]
11
+ },
12
+ "prompt": "Rotate the pose of the woman so that she is facing the right"
13
+ }
defaults/chrono_edit_distill.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "name": "Wan2.1 Chrono Edit Distill 14B",
4
+ "architecture": "chrono_edit",
5
+ "description": "This model in an Image Editor that will follow your instructions. It generates internally a video to produce the desired effect on the original image (the result being the End Image). It expects a very specific prompt format. It is why you must absolutely use the Prompt Enhancer that has been tuned for this model. This version is accelerated using the Chrono Distill",
6
+ "URLs": "chrono_edit",
7
+ "profiles_dir": [""],
8
+ "loras": ["https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/loras_accelerators/chronoedit_distill_lora.safetensors"],
9
+ "loras_multipliers": [1]
10
+ },
11
+ "prompt": "Rotate the pose of the woman so that she is facing the right",
12
+ "num_inference_steps": 8,
13
+ "flow_shift": 2,
14
+ "guidance_phases": 1,
15
+ "guidance_scale": 1
16
+ }
defaults/fantasy.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model":
3
+ {
4
+ "name": "Fantasy Talking 720p 14B",
5
+ "architecture" : "fantasy",
6
+ "modules": [ ["https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_fantasy_speaking_14B_bf16.safetensors"]],
7
+ "description": "The Fantasy Talking model corresponds to the original Wan image 2 video model combined with the Fantasy Speaking module to process an audio Input.",
8
+ "URLs": "i2v_720p"
9
+ },
10
+ "resolution": "1280x720"
11
+ }
defaults/flf2v_720p.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model":
3
+ {
4
+ "name": "First Last Frame to Video 720p (FLF2V) 14B",
5
+ "architecture" : "flf2v_720p",
6
+ "visible" : true,
7
+ "description": "The First Last Frame 2 Video model is the official model Image 2 Video model that supports Start and End frames.",
8
+ "URLs": [
9
+ "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_FLF2V_720p_14B_mbf16.safetensors",
10
+ "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_FLF2V_720p_14B_quanto_mbf16_int8.safetensors",
11
+ "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_FLF2V_720p_14B_quanto_mfp16_int8.safetensors"
12
+ ],
13
+ "auto_quantize": true
14
+ },
15
+ "resolution": "1280x720"
16
+ }
defaults/flux.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "name": "Flux 1 Dev 12B",
4
+ "architecture": "flux",
5
+ "description": "FLUX.1 Dev is a 12 billion parameter rectified flow transformer capable of generating images from text descriptions.",
6
+ "URLs": [
7
+ "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-dev_bf16.safetensors",
8
+ "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-dev_quanto_bf16_int8.safetensors"
9
+ ],
10
+ "image_outputs": true
11
+ },
12
+ "prompt": "draw a hat",
13
+ "resolution": "1280x720",
14
+ "batch_size": 1
15
+ }
defaults/flux2_dev.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "name": "Flux 2 Dev 32B",
4
+ "architecture": "flux2_dev",
5
+ "description": "FLUX.2 Dev is the latest rectified flow transformer from Black Forest Labs for image generation and editing.",
6
+ "URLs": [
7
+ "https://huggingface.co/DeepBeepMeep/Flux2/resolve/main/flux2-dev.safetensors",
8
+ "https://huggingface.co/DeepBeepMeep/Flux2/resolve/main/flux2-dev_quanto_bf16_int8.safetensors"
9
+ ]
10
+ },
11
+ "prompt": "draw a hat on top of a hat inside a hat",
12
+ "resolution": "1024x1024",
13
+ "batch_size": 1,
14
+ "embedded_guidance_scale": 4,
15
+ "sampling_steps": 30
16
+ }
defaults/flux2_dev_nvfp4.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "name": "Flux 2 Dev NVFP4 32B",
4
+ "architecture": "flux2_dev",
5
+ "description": "NVFP4-quantized Flux 2 Dev checkpoint (mixed).",
6
+ "URLs": [
7
+ "https://huggingface.co/DeepBeepMeep/Flux2/resolve/main/flux2-dev-nvfp4-mixed.safetensors"
8
+ ]
9
+ },
10
+ "prompt": "draw a hat on top of a hat inside a hat",
11
+ "resolution": "1024x1024",
12
+ "batch_size": 1,
13
+ "embedded_guidance_scale": 4,
14
+ "sampling_steps": 30
15
+ }
defaults/flux2_klein_4b.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "name": "Flux 2 Klein 4B",
4
+ "architecture": "flux2_klein_4b",
5
+ "description": "FLUX.2 Klein 4B is a compact rectified flow transformer for image generation and editing.",
6
+ "URLs": [
7
+ "https://huggingface.co/DeepBeepMeep/Flux2/resolve/main/flux-2-klein-4b.safetensors",
8
+ "https://huggingface.co/DeepBeepMeep/Flux2/resolve/main/flux-2-klein-4b_quanto_bf16_int8.safetensors"
9
+ ]
10
+ },
11
+ "prompt": "a cozy reading nook with warm sunlight, soft textiles, and a cup of tea on a wooden side table",
12
+ "resolution": "1024x1024",
13
+ "batch_size": 1,
14
+ "embedded_guidance_scale": 1,
15
+ "num_inference_steps": 4
16
+ }
defaults/flux2_klein_9b.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "name": "Flux 2 Klein 9B",
4
+ "architecture": "flux2_klein_9b",
5
+ "description": "FLUX.2 Klein 9B is a balanced rectified flow transformer for image generation and editing.",
6
+ "URLs": [
7
+ "https://huggingface.co/DeepBeepMeep/Flux2/resolve/main/flux-2-klein-9b.safetensors",
8
+ "https://huggingface.co/DeepBeepMeep/Flux2/resolve/main/flux-2-klein-9b_quanto_bf16_int8.safetensors"
9
+ ]
10
+ },
11
+ "prompt": "a glass greenhouse filled with lush tropical plants, misty air, and dappled light",
12
+ "resolution": "1024x1024",
13
+ "batch_size": 1,
14
+ "embedded_guidance_scale": 1,
15
+ "num_inference_steps": 4
16
+ }
defaults/flux_chroma.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "name": "Flux 1 Chroma 1 HD 8.9B",
4
+ "architecture": "flux_chroma",
5
+ "description": "FLUX.1 Chroma is a 8.9 billion parameters model. As a base model, Chroma1 is intentionally designed to be an excellent starting point for finetuning. It provides a strong, neutral foundation for developers, researchers, and artists to create specialized models..",
6
+ "URLs": [
7
+ "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-chroma_hd_bf16.safetensors",
8
+ "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-chroma_hd_quanto_bf16_int8.safetensors"
9
+ ],
10
+ "image_outputs": true
11
+ },
12
+ "prompt": "draw a hat",
13
+ "resolution": "1280x720",
14
+ "guidance_scale": 3.0,
15
+ "num_inference_steps": 20,
16
+ "batch_size": 1
17
+ }
defaults/flux_chroma_radiance.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "name": "Flux 1 Chroma Radiance 8.9B",
4
+ "architecture": "flux_chroma_radiance",
5
+ "description": "FLUX.1 Chroma Radiance 20th of October 2025 version) is a 8.9 billion parameters model. As a base model, Chroma Radiance is intentionally designed to be an excellent starting point for finetuning. It provides a strong, neutral foundation for developers, researchers, and artists to create specialized models.",
6
+ "URLs": [
7
+ "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-chroma_radiance_201025_bf16.safetensors",
8
+ "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-chroma_radiance_201025_quanto_bf16_int8.safetensors"
9
+ ],
10
+ "image_outputs": true
11
+ },
12
+ "prompt": "draw a hat",
13
+ "resolution": "1280x720",
14
+ "guidance_scale": 3.0,
15
+ "num_inference_steps": 20,
16
+ "batch_size": 1
17
+ }
defaults/flux_dev_kontext.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "name": "Flux 1 Dev Kontext 12B",
4
+ "architecture": "flux_dev_kontext",
5
+ "description": "FLUX.1 Kontext is a 12 billion parameter rectified flow transformer capable of editing images based on instructions stored in the Prompt. Please be aware that Flux Kontext is picky on the resolution of the input image and the output dimensions may not match the dimensions of the input image.",
6
+ "URLs": [
7
+ "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1_kontext_dev_bf16.safetensors",
8
+ "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1_kontext_dev_quanto_bf16_int8.safetensors"
9
+ ]
10
+ },
11
+ "prompt": "add a hat",
12
+ "resolution": "1280x720",
13
+ "batch_size": 1
14
+ }
15
+
16
+
defaults/flux_dev_kontext_dreamomni2.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "name": "Flux 1 DreamOmni2 12B",
4
+ "architecture": "flux_dev_kontext_dreamomni2",
5
+ "description": "DreamOmni2 is a Multimodal Instruction-based Editing and Generation Model",
6
+ "URLs": [
7
+ "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1_kontext_dev_bf16.safetensors",
8
+ "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1_kontext_dev_quanto_bf16_int8.safetensors"
9
+ ],
10
+ "preload_URLs": [ "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux_dreamomni2_edit_lora.safetensors",
11
+ "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux_dreamomni2_gen_lora.safetensors"
12
+ ]
13
+ },
14
+ "prompt": "In the scene, the character from the first image stands on the left, and the character from the second image stands on the right. They are shaking hands against the backdrop of a spaceship interior.",
15
+ "resolution": "1280x720",
16
+ "batch_size": 1
17
+ }
18
+
19
+
defaults/flux_dev_umo.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "name": "Flux 1 UMO 12B",
4
+ "architecture": "flux_dev_umo",
5
+ "description": "FLUX.1 UMO Dev is a model that can Edit Images with a specialization in combining multiple image references (resized internally at 512x512 max) to produce an Image output. Best Image preservation at 768x768 Resolution Output.",
6
+ "URLs": "flux",
7
+ "loras": ["https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-dev-UMO_dit_lora_bf16.safetensors"],
8
+ "resolutions": [ ["1024x1024 (1:1)", "1024x1024"],
9
+ ["768x1024 (3:4)", "768x1024"],
10
+ ["1024x768 (4:3)", "1024x768"],
11
+ ["512x1024 (1:2)", "512x1024"],
12
+ ["1024x512 (2:1)", "1024x512"],
13
+ ["768x768 (1:1)", "768x768"],
14
+ ["768x512 (3:2)", "768x512"],
15
+ ["512x768 (2:3)", "512x768"]]
16
+ },
17
+ "prompt": "the man is wearing a hat",
18
+ "embedded_guidance_scale": 4,
19
+ "resolution": "768x768",
20
+ "batch_size": 1
21
+ }
22
+
23
+
defaults/flux_dev_uso.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "name": "Flux 1 USO Dev 12B",
4
+ "architecture": "flux_dev_uso",
5
+ "description": "FLUX.1 USO Dev is a model that can Edit Images with a specialization in Style Transfers (up to two).",
6
+ "modules": [ ["https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-dev-USO_projector_bf16.safetensors"]],
7
+ "URLs": "flux",
8
+ "loras": ["https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-dev-USO_dit_lora_bf16.safetensors"]
9
+ },
10
+ "prompt": "the man is wearing a hat",
11
+ "embedded_guidance_scale": 4,
12
+ "resolution": "1024x1024",
13
+ "batch_size": 1
14
+ }
15
+
16
+
defaults/flux_krea.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "name": "Flux 1 Dev Krea 12B",
4
+ "architecture": "flux",
5
+ "description": "Cutting-edge output quality, with a focus on aesthetic photography..",
6
+ "URLs": [
7
+ "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-krea-dev_bf16.safetensors",
8
+ "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-krea-dev_quanto_bf16_int8.safetensors"
9
+ ],
10
+ "image_outputs": true
11
+ },
12
+ "prompt": "draw a hat",
13
+ "resolution": "1280x720",
14
+ "batch_size": 1
15
+ }
defaults/flux_schnell.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "name": "Flux 1 Schnell 12B",
4
+ "architecture": "flux_schnell",
5
+ "description": "FLUX.1 Schnell is a 12 billion parameter rectified flow transformer capable of generating images from text descriptions. As a distilled model it requires fewer denoising steps.",
6
+ "URLs": [
7
+ "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-schnell_bf16.safetensors",
8
+ "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-schnell_quanto_bf16_int8.safetensors"
9
+ ],
10
+ "image_outputs": true
11
+ },
12
+ "prompt": "draw a hat",
13
+ "resolution": "1280x720",
14
+ "num_inference_steps": 10,
15
+ "batch_size": 1
16
+ }
defaults/flux_srpo.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "name": "Flux 1 Dev SRPO 12B",
4
+ "architecture": "flux",
5
+ "description": "By fine-tuning the FLUX.1.dev model with optimized denoising and online reward adjustment, SRPO improves its human-evaluated realism and aesthetic quality by over 3x.",
6
+ "URLs": [
7
+ "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-srpo-dev_bf16.safetensors",
8
+ "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-srpo-dev_quanto_bf16_int8.safetensors"
9
+ ]
10
+ },
11
+ "prompt": "draw a hat",
12
+ "resolution": "1024x1024",
13
+ "batch_size": 1
14
+ }
defaults/flux_srpo_uso.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "name": "Flux 1 USO SRPO 12B",
4
+ "architecture": "flux_dev_uso",
5
+ "description": "FLUX.1 USO SRPO is a model that can Edit Images with a specialization in Style Transfers (up to two). It leverages the improved Image quality brought by the SRPO process",
6
+ "modules": [ "flux_dev_uso"],
7
+ "URLs": "flux_srpo",
8
+ "loras": "flux_dev_uso"
9
+ },
10
+ "prompt": "the man is wearing a hat",
11
+ "embedded_guidance_scale": 4,
12
+ "resolution": "1024x1024",
13
+ "batch_size": 1
14
+ }
15
+
16
+
defaults/fun_inp.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model":
3
+ {
4
+ "name": "Fun InP image2video 14B",
5
+ "architecture" : "fun_inp",
6
+ "description": "The Fun model is an alternative image 2 video that supports out the box End Image fixing (contrary to the original Wan image 2 video model).",
7
+ "URLs": [
8
+ "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_Fun_InP_14B_bf16.safetensors",
9
+ "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_Fun_InP_14B_quanto_int8.safetensors",
10
+ "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_Fun_InP_14B_quanto_fp16_int8.safetensors"
11
+ ]
12
+ }
13
+ }
defaults/fun_inp_1.3B.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model":
3
+ {
4
+ "name": "Fun InP image2video 1.3B",
5
+ "architecture" : "fun_inp_1.3B",
6
+ "description": "The Fun model is an alternative image 2 video that supports out the box End Image fixing (contrary to the original Wan image 2 video model). The 1.3B adds also image 2 to video capability to the 1.3B model.",
7
+ "URLs": [
8
+ "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_Fun_InP_1.3B_bf16.safetensors"
9
+ ]
10
+ }
11
+ }
defaults/heartmula_oss_3b.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "name": "TTS HeartMuLa OSS 3B",
4
+ "architecture": "heartmula_oss_3b",
5
+ "description": "HeartMuLa open music generation conditioned on lyrics and tags.",
6
+ "URLs": [
7
+ "https://huggingface.co/DeepBeepMeep/TTS/resolve/main/heartmula_oss_3b_bf16.safetensors",
8
+ "https://huggingface.co/DeepBeepMeep/TTS/resolve/main/heartmula_oss_3b_quanto_bf16_int8.safetensors"
9
+ ]
10
+ },
11
+ "prompt": "[Verse]\nMorning light through the window pane\nI hum a tune to chase the rain\nSteady steps on a quiet street\nHeart and rhythm, gentle beat",
12
+ "alt_prompt": "piano,happy,wedding",
13
+ "temperature": 1.0
14
+ }
defaults/heartmula_rl_oss_3b_20260123.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "name": "TTS HeartMuLa RL OSS (20260123) 3B",
4
+ "architecture": "heartmula_oss_3b",
5
+ "description": "HeartMuLa RL OSS 3B checkpoint (20260123) with updated codec support. This version should be better at following instructions thanks to a reinforced learning training.",
6
+ "URLs": [
7
+ "https://huggingface.co/DeepBeepMeep/TTS/resolve/main/heartmula_rl_oss_3b_20260123_bf16.safetensors",
8
+ "https://huggingface.co/DeepBeepMeep/TTS/resolve/main/heartmula_rl_oss_3b_20260123_quanto_bf16_int8.safetensors"
9
+ ],
10
+ "heartmula_codec_version": "20260123"
11
+ },
12
+ "prompt": "[Verse]\nMorning light through the window pane\nI hum a tune to chase the rain\nSteady steps on a quiet street\nHeart and rhythm, gentle beat",
13
+ "alt_prompt": "piano,happy,wedding",
14
+ "temperature": 1.0
15
+ }
defaults/hunyuan.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model":
3
+ {
4
+ "name": "Hunyuan Video Text2video 720p 13B",
5
+ "architecture" : "hunyuan",
6
+ "description": "Probably the best text 2 video model available.",
7
+ "URLs": [
8
+ "https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_720_bf16.safetensors",
9
+ "https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_720_quanto_int8.safetensors"
10
+ ]
11
+ }
12
+ }
defaults/hunyuan_1_5_480_i2v.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "name": "Hunyuan Video 1.5 Image2video 480p 8B",
4
+ "architecture": "hunyuan_1_5_i2v",
5
+ "description": "HunyuanVideo-1.5 is a video generation model that delivers top-tier quality with only 8.3B parameters, significantly lowering the barrier to usage. This is the Image2video 480p version.",
6
+ "URLs": [
7
+ "https://huggingface.co/DeepBeepMeep/HunyuanVideo1.5/resolve/main/hunyuan_video_1.5_i2v_480_bf16.safetensors",
8
+ "https://huggingface.co/DeepBeepMeep/HunyuanVideo1.5/resolve/main/hunyuan_video_1.5_i2v_480_quanto_bf16_int8.safetensors"
9
+ ]
10
+ },
11
+ "prompt" : "",
12
+ "resolution": "832x480",
13
+ "video_length": 97,
14
+ "num_inference_steps": 30,
15
+ "guidance_scale": 6.0,
16
+ "flow_shift": 5.0
17
+ }
defaults/hunyuan_1_5_480_i2v_step_distilled.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "name": "Hunyuan Video 1.5 Image2video 480p Step Distilled 8B",
4
+ "architecture": "hunyuan_1_5_i2v",
5
+ "description": "HunyuanVideo-1.5 is a video generation model that delivers top-tier quality with only 8.3B parameters, significantly lowering the barrier to usage. This is the official Step Distilled 480p versionby the Hunyuan Team.",
6
+ "profiles_dir": [""],
7
+ "URLs": [
8
+ "https://huggingface.co/DeepBeepMeep/HunyuanVideo1.5/resolve/main/hunyuan_video_1.5_i2v_480_step_distilled_quanto_bf16_int8.safetensors",
9
+ "https://huggingface.co/DeepBeepMeep/HunyuanVideo1.5/resolve/main/hunyuan_video_1.5_i2v_480_step_distilled_bf16.safetensors"
10
+ ]
11
+ },
12
+ "prompt": "",
13
+ "resolution": "832x480",
14
+ "video_length": 97,
15
+ "num_inference_steps": 8,
16
+ "guidance_scale": 1.0,
17
+ "flow_shift": 5.0
18
+ }
defaults/hunyuan_1_5_480_t2v.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "name": "Hunyuan Video 1.5 Text2video 480p 8B",
4
+ "architecture": "hunyuan_1_5_t2v",
5
+ "description": "HunyuanVideo-1.5 is a video generation model that delivers top-tier quality with only 8.3B parameters, significantly lowering the barrier to usage. This is the Text2video 480p version.",
6
+ "URLs": [
7
+ "https://huggingface.co/DeepBeepMeep/HunyuanVideo1.5/resolve/main/hunyuan_video_1.5_480_bf16.safetensors",
8
+ "https://huggingface.co/DeepBeepMeep/HunyuanVideo1.5/resolve/main/hunyuan_video_1.5_480_quanto_bf16_int8.safetensors"
9
+ ]
10
+ },
11
+ "resolution": "832x480",
12
+ "video_length": 97,
13
+ "num_inference_steps": 30,
14
+ "guidance_scale": 6.0,
15
+ "flow_shift": 5.0
16
+ }
defaults/hunyuan_1_5_480_t2v_lightx2v.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "name": "Hunyuan Video 1.5 Text2video 480p Lightx2v 8B",
4
+ "architecture": "hunyuan_1_5_t2v",
5
+ "description": "HunyuanVideo-1.5 is a video generation model that delivers top-tier quality with only 8.3B parameters, significantly lowering the barrier to usage. This is the Text2video 480p version accelerated by lightx2v.",
6
+ "profiles_dir": [""],
7
+ "URLs": [
8
+ "https://huggingface.co/DeepBeepMeep/HunyuanVideo1.5/resolve/main/hy1.5_t2v_480p_lightx2v_4step_bf16.safetensors",
9
+ "https://huggingface.co/DeepBeepMeep/HunyuanVideo1.5/resolve/main/hy1.5_t2v_480p_lightx2v_4step_quanto_int8_bf16.safetensors"
10
+ ]
11
+ },
12
+ "resolution": "832x480",
13
+ "video_length": 97,
14
+ "num_inference_steps": 4,
15
+ "guidance_scale": 1.0,
16
+ "flow_shift": 8.0
17
+ }
defaults/hunyuan_1_5_i2v.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "name": "Hunyuan Video 1.5 Image2video 720p 8B",
4
+ "architecture": "hunyuan_1_5_i2v",
5
+ "description": "HunyuanVideo-1.5 is a video generation model that delivers top-tier quality with only 8.3B parameters, significantly lowering the barrier to usage. This is the Image2video 720p version.",
6
+ "URLs": [
7
+ "https://huggingface.co/DeepBeepMeep/HunyuanVideo1.5/resolve/main/hunyuan_video_1.5_i2v_720_bf16.safetensors",
8
+ "https://huggingface.co/DeepBeepMeep/HunyuanVideo1.5/resolve/main/hunyuan_video_1.5_i2v_720_quanto_bf16_int8.safetensors"
9
+ ]
10
+ },
11
+ "prompt" : "",
12
+ "resolution": "1280x720",
13
+ "video_length": 97,
14
+ "num_inference_steps": 30,
15
+ "guidance_scale": 6.0,
16
+ "flow_shift": 7.0
17
+ }
defaults/hunyuan_1_5_t2v.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "name": "Hunyuan Video 1.5 Text2video 720p 8B",
4
+ "architecture": "hunyuan_1_5_t2v",
5
+ "description": "HunyuanVideo-1.5 is a video generation model that delivers top-tier quality with only 8.3B parameters, significantly lowering the barrier to usage. This is the Text2video 720p version.",
6
+ "URLs": [
7
+ "https://huggingface.co/DeepBeepMeep/HunyuanVideo1.5/resolve/main/hunyuan_video_1.5_720_bf16.safetensors",
8
+ "https://huggingface.co/DeepBeepMeep/HunyuanVideo1.5/resolve/main/hunyuan_video_1.5_720_quanto_bf16_int8.safetensors"
9
+ ]
10
+ },
11
+ "resolution": "1280x720",
12
+ "video_length": 97,
13
+ "num_inference_steps": 30,
14
+ "guidance_scale": 6.0,
15
+ "flow_shift": 9.0
16
+ }
defaults/hunyuan_1_5_upsampler.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "name": "Hunyuan Video 1.5 Upsampler 720p 8B",
4
+ "architecture": "hunyuan_1_5_upsampler",
5
+ "description": "HunyuanVideo-1.5 is a video generation model that delivers top-tier quality with only 8.3B parameters, significantly lowering the barrier to usage. This version is a specialized 720p upsampler.",
6
+ "URLs": [
7
+ "https://huggingface.co/DeepBeepMeep/HunyuanVideo1.5/resolve/main/hunyuan_video_1.5_upsampler_720_bf16.safetensors",
8
+ "https://huggingface.co/DeepBeepMeep/HunyuanVideo1.5/resolve/main/hunyuan_video_1.5_upsampler_720_quanto_bf16_int8.safetensors"
9
+ ],
10
+ "upsampler": "720",
11
+ "preload_URLs": [
12
+ "https://huggingface.co/DeepBeepMeep/HunyuanVideo1.5/resolve/main/hunyuan_video_1.5_upsampler_720p_sr_distilled.safetensors",
13
+ "https://huggingface.co/DeepBeepMeep/HunyuanVideo1.5/resolve/main/hunyuan_video_1.5_upsampler_720p_sr_distilled_config.json"
14
+ ]
15
+
16
+ },
17
+ "resolution": "1280x720",
18
+ "video_length": 97,
19
+ "num_inference_steps": 6,
20
+ "guidance_scale": 1,
21
+ "flow_shift": 2.0
22
+ }
defaults/hunyuan_1_5_upsampler_1080.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "name": "Hunyuan Video 1.5 Upsampler 1080p 8B",
4
+ "architecture": "hunyuan_1_5_upsampler",
5
+ "description": "HunyuanVideo-1.5 is a video generation model that delivers top-tier quality with only 8.3B parameters, significantly lowering the barrier to usage. This version is a specialized 1080p upsampler.",
6
+ "upsampler": "1080",
7
+ "URLs": [
8
+ "https://huggingface.co/DeepBeepMeep/HunyuanVideo1.5/resolve/main/hunyuan_video_1.5_upsampler_1080_bf16.safetensors",
9
+ "https://huggingface.co/DeepBeepMeep/HunyuanVideo1.5/resolve/main/hunyuan_video_1.5_upsampler_1080_quanto_bf16_int8.safetensors"
10
+ ],
11
+ "preload_URLs": [
12
+ "https://huggingface.co/DeepBeepMeep/HunyuanVideo1.5/resolve/main/hunyuan_video_1.5_upsampler_1080p_sr_distilled.safetensors",
13
+ "https://huggingface.co/DeepBeepMeep/HunyuanVideo1.5/resolve/main/hunyuan_video_1.5_upsampler_1080p_sr_distilled_config.json"
14
+ ]
15
+
16
+ },
17
+ "resolution": "1920x1088",
18
+ "video_length": 97,
19
+ "num_inference_steps": 8,
20
+ "guidance_scale": 1,
21
+ "flow_shift": 2.0
22
+ }
defaults/hunyuan_avatar.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model":
3
+ {
4
+ "name": "Hunyuan Video Avatar 720p 13B",
5
+ "architecture" : "hunyuan_avatar",
6
+ "description": "With the Hunyuan Video Avatar model you can animate a person based on the content of an audio input. Please note that the video generator works by processing 128 frames segment at a time (even if you ask less). The good news is that it will concatenate multiple segments for long video generation (max 3 segments recommended as the quality will get worse).",
7
+ "URLs": [
8
+ "https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_avatar_720_bf16.safetensors",
9
+ "https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_avatar_720_quanto_bf16_int8.safetensors"
10
+ ]
11
+ }
12
+ }
defaults/hunyuan_custom.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model":
3
+ {
4
+ "name": "Hunyuan Video Custom 720p 13B",
5
+ "architecture" : "hunyuan_custom",
6
+ "description": "The Hunyuan Video Custom model is probably the best model to transfer people (only people for the moment) as it is quite good to keep their identity. However it is slow as to get good results, you need to generate 720p videos with 30 steps.",
7
+ "URLs": [
8
+ "https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_custom_720_bf16.safetensors",
9
+ "https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_custom_720_quanto_bf16_int8.safetensors"
10
+ ]
11
+ }
12
+ }
defaults/hunyuan_custom_audio.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model":
3
+ {
4
+ "name": "Hunyuan Video Custom Audio 720p 13B",
5
+ "architecture" : "hunyuan_custom_audio",
6
+ "description": "The Hunyuan Video Custom Audio model can be used to generate scenes of a person speaking given a Reference Image and a Recorded Voice or Song. The reference image is not a start image and therefore one can represent the person in a different context.The video length can be anything up to 10s. It is also quite good to generate no sound Video based on a person.",
7
+ "URLs": [
8
+ "https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_custom_audio_720_bf16.safetensors",
9
+ "https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_custom_audio_720_quanto_bf16_int8.safetensors"
10
+ ]
11
+ }
12
+ }
defaults/hunyuan_custom_edit.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model":
3
+ {
4
+ "name": "Hunyuan Video Custom Edit 720p 13B",
5
+ "architecture" : "hunyuan_custom_edit",
6
+ "description": "The Hunyuan Video Custom Edit model can be used to do Video inpainting on a person (add accessories or completely replace the person). You will need in any case to define a Video Mask which will indicate which area of the Video should be edited.",
7
+ "URLs": [
8
+ "https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_custom_edit_720_bf16.safetensors",
9
+ "https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_custom_edit_720_quanto_bf16_int8.safetensors"
10
+ ]
11
+ }
12
+ }
defaults/hunyuan_i2v.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model":
3
+ {
4
+ "name": "Hunyuan Video Image2video 720p 13B",
5
+ "architecture" : "hunyuan_i2v",
6
+ "description": "A good looking image 2 video model, but not so good in prompt adherence.",
7
+ "URLs": [
8
+ "https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_i2v_720_bf16v2.safetensors",
9
+ "https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_i2v_720_quanto_int8v2.safetensors"
10
+ ]
11
+ }
12
+ }