Spaces:

attong39
/

Wan2GP

Build error

App Files Files Community

attong39 commited on Feb 1

Commit

f523f14

verified ·

1 Parent(s): 483aeb5

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +2 -0
Custom Resolutions Instructions.txt +16 -0
Dockerfile +92 -0
LICENSE.txt +46 -0
README.md +304 -10
defaults/ReadMe.txt +13 -0
defaults/ace_step_v1.json +19 -0
defaults/alpha.json +19 -0
defaults/alpha2.json +19 -0
defaults/alpha2_sf.json +18 -0
defaults/alpha_sf.json +17 -0
defaults/animate.json +17 -0
defaults/chatterbox.json +18 -0
defaults/chrono_edit.json +13 -0
defaults/chrono_edit_distill.json +16 -0
defaults/fantasy.json +11 -0
defaults/flf2v_720p.json +16 -0
defaults/flux.json +15 -0
defaults/flux2_dev.json +16 -0
defaults/flux2_dev_nvfp4.json +15 -0
defaults/flux2_klein_4b.json +16 -0
defaults/flux2_klein_9b.json +16 -0
defaults/flux_chroma.json +17 -0
defaults/flux_chroma_radiance.json +17 -0
defaults/flux_dev_kontext.json +16 -0
defaults/flux_dev_kontext_dreamomni2.json +19 -0
defaults/flux_dev_umo.json +23 -0
defaults/flux_dev_uso.json +16 -0
defaults/flux_krea.json +15 -0
defaults/flux_schnell.json +16 -0
defaults/flux_srpo.json +14 -0
defaults/flux_srpo_uso.json +16 -0
defaults/fun_inp.json +13 -0
defaults/fun_inp_1.3B.json +11 -0
defaults/heartmula_oss_3b.json +14 -0
defaults/heartmula_rl_oss_3b_20260123.json +15 -0
defaults/hunyuan.json +12 -0
defaults/hunyuan_1_5_480_i2v.json +17 -0
defaults/hunyuan_1_5_480_i2v_step_distilled.json +18 -0
defaults/hunyuan_1_5_480_t2v.json +16 -0
defaults/hunyuan_1_5_480_t2v_lightx2v.json +17 -0
defaults/hunyuan_1_5_i2v.json +17 -0
defaults/hunyuan_1_5_t2v.json +16 -0
defaults/hunyuan_1_5_upsampler.json +22 -0
defaults/hunyuan_1_5_upsampler_1080.json +22 -0
defaults/hunyuan_avatar.json +12 -0
defaults/hunyuan_custom.json +12 -0
defaults/hunyuan_custom_audio.json +12 -0
defaults/hunyuan_custom_edit.json +12 -0
defaults/hunyuan_i2v.json +12 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+preprocessing/matanyone/tutorial_multi_targets.mp4 filter=lfs diff=lfs merge=lfs -text
+preprocessing/matanyone/tutorial_single_target.mp4 filter=lfs diff=lfs merge=lfs -text

Custom Resolutions Instructions.txt ADDED Viewed

	@@ -0,0 +1,16 @@

+You can override the choice of Resolutions offered by WanGP, if you create a file "resolutions.json" in the main WanGP folder.
+This file is composed of a list of 2 elements sublists. Each 2 elements sublist should have the format ["Label", "WxH"] where W, H are respectively the Width and Height of the resolution. Please make sure that W and H are multiples of 16. The letter "x" should be placed inbetween these two dimensions.
+Here is below a sample "resolutions.json" file :
+[
+	["1280x720 (16:9, 720p)", "1280x720"],
+	["720x1280 (9:16, 720p)", "720x1280"],
+	["1024x1024 (1:1, 720p)", "1024x1024"],
+	["1280x544 (21:9, 720p)", "1280x544"],
+	["544x1280 (9:21, 720p)", "544x1280"],
+	["1104x832 (4:3, 720p)", "1104x832"],
+	["832x1104 (3:4, 720p)", "832x1104"],
+    ["960x960 (1:1, 720p)", "960x960"],
+    ["832x480 (16:9, 480p)", "832x480"]
+]

Dockerfile ADDED Viewed

	@@ -0,0 +1,92 @@

+FROM nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04
+# Build arg for GPU architectures - specify which CUDA compute capabilities to compile for
+# Common values:
+#   7.0  - Tesla V100
+#   7.5  - RTX 2060, 2070, 2080, Titan RTX
+#   8.0  - A100, A800 (Ampere data center)
+#   8.6  - RTX 3060, 3070, 3080, 3090 (Ampere consumer)
+#   8.9  - RTX 4070, 4080, 4090 (Ada Lovelace)
+#   9.0  - H100, H800 (Hopper data center)
+#   12.0 - RTX 5070, 5080, 5090 (Blackwell) - Note: sm_120 architecture
+#
+# Examples:
+#   RTX 3060: --build-arg CUDA_ARCHITECTURES="8.6"
+#   RTX 4090: --build-arg CUDA_ARCHITECTURES="8.9"
+#   Multiple: --build-arg CUDA_ARCHITECTURES="8.0;8.6;8.9"
+#
+# Note: Including 8.9 or 9.0 may cause compilation issues on some setups
+# Default includes 8.0 and 8.6 for broad Ampere compatibility
+ARG CUDA_ARCHITECTURES="8.0;8.6"
+ENV DEBIAN_FRONTEND=noninteractive
+# Install system dependencies
+RUN apt update && \
+    apt install -y \
+    python3 python3-pip git wget curl cmake ninja-build \
+    libgl1 libglib2.0-0 ffmpeg && \
+    apt clean
+WORKDIR /workspace
+COPY requirements.txt .
+# Upgrade pip first
+RUN pip install --upgrade pip setuptools wheel
+# Install requirements if exists
+RUN pip install -r requirements.txt
+# Install PyTorch with CUDA support
+RUN pip install --extra-index-url https://download.pytorch.org/whl/cu124 \
+    torch==2.6.0+cu124 torchvision==0.21.0+cu124
+# Install SageAttention from git (patch GPU detection)
+ENV TORCH_CUDA_ARCH_LIST="${CUDA_ARCHITECTURES}"
+ENV FORCE_CUDA="1"
+ENV MAX_JOBS="1"
+COPY <<EOF /tmp/patch_setup.py
+import os
+with open('setup.py', 'r') as f:
+    content = f.read()
+# Get architectures from environment variable
+arch_list = os.environ.get('TORCH_CUDA_ARCH_LIST')
+arch_set = '{' + ', '.join([f'"{arch}"' for arch in arch_list.split(';')]) + '}'
+# Replace the GPU detection section
+old_section = '''compute_capabilities = set()
+device_count = torch.cuda.device_count()
+for i in range(device_count):
+    major, minor = torch.cuda.get_device_capability(i)
+    if major < 8:
+        warnings.warn(f"skipping GPU {i} with compute capability {major}.{minor}")
+        continue
+    compute_capabilities.add(f"{major}.{minor}")'''
+new_section = 'compute_capabilities = ' + arch_set + '''
+print(f"Manually set compute capabilities: {compute_capabilities}")'''
+content = content.replace(old_section, new_section)
+with open('setup.py', 'w') as f:
+    f.write(content)
+EOF
+RUN git clone https://github.com/thu-ml/SageAttention.git /tmp/sageattention && \
+    cd /tmp/sageattention && \
+    python3 /tmp/patch_setup.py && \
+    pip install --no-build-isolation .
+RUN useradd -u 1000 -ms /bin/bash user
+RUN chown -R user:user /workspace
+RUN mkdir /home/user/.cache && \
+    chown -R user:user /home/user/.cache
+COPY entrypoint.sh /workspace/entrypoint.sh
+ENTRYPOINT ["/workspace/entrypoint.sh"]

LICENSE.txt ADDED Viewed

	@@ -0,0 +1,46 @@

+WanGP NON-COMMERCIAL EVALUATION LICENSE 1.0
+Definitions
+1.1 “Software” means the source code, binaries, libraries, utilities and UI released under this license.
+1.2 “Output” means images, videos or other media produced by running the Software.
+1.3 “Commercial Use” means:
+a) selling, sublicensing, renting, leasing, or otherwise distributing the Software, in whole or in part, for a fee or other consideration; or
+b) offering the Software (or any derivative) as part of a paid product or hosted service; or
+c) using the Software (or any derivative) to provide cloud-based or backend services, where end users access or pay for those services.
+License Grant
+Subject to Section 3:
+a) You are granted a worldwide, non-exclusive, royalty-free, revocable license to use, reproduce, modify and distribute the Software for non-commercial purposes only.
+b) You are granted a worldwide, non-exclusive, royalty-free, irrevocable license to use, reproduce, modify and distribute the Output for any purpose, including commercial sale, provided that any commercial distribution of the Output includes a clear notice that the Output was produced (in whole or in part) using WanGP, along with a hyperlink to the WanGP application’s About tab or repository.
+Restrictions
+3.1 You MAY NOT distribute, sublicense or otherwise make available the Software (or any derivative) for Commercial Use.
+3.2 You MAY sell, license or otherwise commercially exploit the Output without restriction.
+3.3 If you wish to use the Software for Commercial Use, you must obtain a separate commercial license from the Licensor.
+Third-Party Components 4.1 The Software includes components licensed under various open-source licenses (e.g., Apache 2.0, MIT, BSD). 4.2 You must comply with all applicable terms of those third-party licenses, including preservation of copyright notices, inclusion of required license texts, and patent-grant provisions. 4.3 You can find the full text of each third-party license via the “About” tab in the WanGP application, which provides links to their original GitHub repositories.
+Attribution
+5.1 You must give appropriate credit by including:
+• a copy of this license (or a link to it), and
+• a notice that your use is based on “WanGP”.
+5.2 You may do so in any reasonable manner, but not in any way that suggests the Licensor endorses you or your use.
+Disclaimer of Warranty & Liability
+THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE.
+Commercial Licensing The Licensor may offer commercial licenses for the Software, which grant rights to use the Software for Commercial Use. Please contact [deepbeepmeep@yahoo.com] for terms and pricing.
+Effective Date & Previous Versions
+8.1 This license is effective as of the date the LICENSE file is updated in the WanGP repository.
+8.2 Any copies of the Software obtained under prior license terms before this Effective Date remain governed by those prior terms; such granted rights are irrevocable.
+8.3 Use of the Software after the release of any subsequent version by the Licensor is subject to the terms of the then-current license, unless a separate agreement is in place.
+Acceptable Use / Moral Clause
+9.1 You MAY NOT use the Software or the Output to facilitate or produce content that is illegal, harmful, violent, harassing, defamatory, fraudulent, or otherwise violates applicable laws or fundamental human rights.
+9.2 You MAY NOT deploy the Software or Output in contexts that promote hate speech, extremist ideology, human rights abuses, or other actions that could foreseeably cause significant harm to individuals or groups.
+9.3 The Licensor reserves the right to terminate the rights granted under this license if a licensee materially breaches this Acceptable Use clause.
+END OF LICENSE

README.md CHANGED Viewed

@@ -1,10 +1,304 @@
----
-title: Wan2GP
-emoji: ⚡
-colorFrom: gray
-colorTo: red
-sdk: docker
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+---
+title: Wan2GP
+emoji: 🌖
+colorFrom: indigo
+colorTo: red
+sdk: docker
+app_port: 7860
+---
+# WanGP
+-----
+<p align="center">
+<b>WanGP by DeepBeepMeep : The best Open Source Video Generative Models Accessible to the GPU Poor</b>
+</p>
+WanGP supports the Wan (and derived models) but also Hunyuan Video, Flux, Qwen, Z-Image, LongCat, Kandinsky, LTX 1 & 2, Qwen3 TTS, Chatterbox, HearMula, ... with:
+- Low VRAM requirements (as low as 6 GB of VRAM is sufficient for certain models)
+- Support for old Nvidia GPUs (RTX 10XX, 20xx, ...)
+- Support for AMD GPUs Radeon RX 76XX, 77XX, 78XX & 79XX, instructions in the Installation Section Below.
+- Very Fast on the latest GPUs
+- Easy to use Full Web based interface
+- Support for many checkpoint Quantized formats: int8, fp8, gguf, NV FP4, Nunchaku
+- Auto download of the required model adapted to your specific architecture
+- Tools integrated to facilitate Video Generation : Mask Editor, Prompt Enhancer, Temporal and Spatial Generation, MMAudio, Video Browser, Pose / Depth / Flow extractor, Motion Designer
+- Plenty of ready to use Plug Ins: Gallery Browser, Upscaler, Models/Checkpoints Manager, CivitAI browser and downloader, ...
+- Loras Support to customize each model
+- Queuing system : make your shopping list of videos to generate and come back later
+- Headless mode: launch the generation of multiple image / videos / audio files using a command line
+**Discord Server to get Help from the WanGP Community and show your Best Gens:** https://discord.gg/g7efUW9jGV
+**Follow DeepBeepMeep on Twitter/X to get the Latest News**: https://x.com/deepbeepmeep
+## 📋 Table of Contents
+- [🚀 Quick Start](#-quick-start)
+- [📦 Installation](#-installation)
+- [🎯 Usage](#-usage)
+- [📚 Documentation](#-documentation)
+- [🔗 Related Projects](#-related-projects)
+## 🔥 Latest Updates :
+### January 29th 2026: WanGP v10.56, Music for your Hearts
+WanGP Special *TTS* (Text To Speech) Release:
+- **Heart Mula**: *Suno* quality song with lyrics on your local PC. You can generate up to 4 min of music.
+- **Ace Step v1**: while waiting for *Ace Step v1.5* (which should be released very soon), enjoy this oldie (2025!) but goodie song generatpr as an appetizer. Ace Step v1 is a very fast Song generator. It is a *Diffusion* based, so dont hesitate to turn on Profile 4 to go as low as 4B VRAM while remaining fast.
+- **Qwen 3 TTS**: you can either do *Voice Cloning*, *Generate a Custom Voice based on a Prompt* or use a *Predefined Voice*
+- **TTS Features**:
+   - **Early stop** : you can abort a gen, while still keeping what has been generated (will work only for TTS models which are *Autoregressive Models*, no need to ask that for Image/Video gens which are *Diffusion Models*)
+   - **Specialized Prompt Enhancers**: if you enter the prompt in Heart Mula *"a song about AI generation"*, *WanGP Prompt Enhancer* will generate the corresponding masterpiece for you. Likewise you can enhance "A speech about AI generation" when using Qwen3 TTS or ChatterBox.
+   - **Custom Output folder for Audio Gens**: you can now choose a different folder for the *Audio Outputs*
+   - **Default Memory Profile for Audio Models**: TTS models can get very slow if you use profile 4 (being autoregressive models, they will need to load all the layers one per one to generate one single audio token then rinse & repeat). On the other hand, they dont't need as much VRAM, so you can now define a more agressive profile (3+ for instance)
+- **Z Image Base**: try it if you are into the *Z Image* hype but it will be probably useless for you unless you are a researcher and / or want to build a finetune out of it. This model requires from 35 to 50 steps (4x to 6x slower than *Z Image turbo*) and cfg > 1 (an additional 2x slower) and there is no *Reinforcement Learning* so Output Images wont be as good. The plus side is a higher diversity and *Native Negative Prompt* (versus Z Image virtual Negative Prompt using *NAG*).
+Note that Z Image Base is very sensitive to the *Attention Mode*: it is not compatible with *Sage 1* as it produces black frames. So I have disabled Sage for RTX 30xx. Also there are reports it produces some vertical banding artifacts with *Sage 2*
+- **Flux 1/2 NAG** : *Flux 2 Klein* is your new best friend but you miss *Negative Prompts*, *NAG* support for Distilled models will make you best buddies forever as NAG simulates Negative prompts.
+- **Various Improvements**:
+   - Video /Audio Galleries now support deletions of gens done outside WanGP
+   - added *MP3 support* for audio outputs
+   - *Check for Updates* button for *Plugins* to see in a glance if any of your plugin can be updated
+   - *Prompt Enhancer* generates a different enhanced prompt each timee you click on it. You can define in the config tab its gen parameters (top k, temperature)
+   - New *Root Loras* folder can be defined in the config Tab. Useful if you have multiple WanGP instances or want to store easily all your loras in a different hard drive
+   - added new setting *Attention Mode Override* in the *Misc* tab
+   - Experimental: allowed changing *Configuration* during a *Generation*
+*update 10.51*: new Heart Mula Finetune better at following instructions, Extra settings (cfg, top k) for TTS models, Rife v4\
+*update 10.52*: updated plugin list and added version tracking\
+*update 10.53*: video/audio galleries now support deletions\
+*update 10.54*: added Z Image Base, prompt enhancers improvements, configurable loras root folder\
+*update 10.55*: blocked Sage with Z Image on RTX30xx and added override attention mode settings, allowed changing config during generation\
+*update 10.56*: added NAG for Flux 1/2 & Ace Step v1
+### January 20th 2026: WanGP v10.43, The Cost Saver
+*GPUs are expensive, RAM is expensive, SSD are expensive, sadly we live now in a GPU & RAM poor.*
+WanGP comes again to the rescue:
+- **GGUF support**: as some of you know, I am not a big fan of this format because when used with image / video generative models we don't get any speed boost (matrices multiplications are still done at 16 bits), VRAM savings are small and quality is worse than with int8/fp8. Still gguf has one advantage: it consumes less RAM and harddrive space. So enjoy gguf support. I have added ready to use *Kijai gguf finetunes* for *LTX 2*.
+- **Models Manager PlugIn**: use this *Plugin* to identify how much space is taken by each *model* / *finetune* and delete the ones you no longer use. Try to avoid deleting shared files otherwise they will be downloaded again.
+- **LTX 2 Dual Video & Audio Control**: you no longer need to extract the audio track of a *Control Video* if you want to use it as well to drive the video generation. New mode will allow you to use both motion and audio from Video Control.
+- **LTX 2 - Custom VAE URL**: some users have asked if they could use the old *Distiller VAE* instead of the new one. To do that, create a *finetune* def based on an existing model definition and save it in the *finetunes/* folder with this entry (check the *docs/FINETUNES.md* doc):
+```
+		"VAE_URLs": ["https://huggingface.co/DeepBeepMeep/LTX-2/resolve/main/ltx-2-19b_vae_old.safetensors"]
+```
+- **Flux 2 Klein 4B & 9B**: try these distilled models as fast as Z_Image if not faster but with out of the box image edition capabiltities
+- **Flux 2 & Qwen Outpainting + Lanpaint**: the inpaint mode of these models support now *outpainting* + more combination possible with *Lanpaint*
+- **RAM Optimizations for multi minutes Videos**: processing, saving, spatial & Temporal upsampling very long videos should require much less RAM.
+- **Text Encoder Cache**: if you are asking a Text prompt already used recently with the current model, it will be taken straight from a cache. The cache is optimized to consume little RAM. It wont work with certain models such as Qwen where the Text Prompt is combined internally with an Image.
+*update 10.41*: added Flux 2 klein\
+*update 10.42*: added RAM optimizations & Text Encoder Cache\
+*update 10.43*: added outpainting for Qwen & Flux 2, Lanpaint for Flux 2
+### January 15th 2026: WanGP v10.30, The Need for Speed ...
+- **LTX Distilled VAE Upgrade**: *Kijai* has observed that the Distilled VAE produces images that were less sharp that the VAE of the Non Distilled model. I have used this as an opportunity to repackage all the LTX 2 checkpoints and reduce their overal HD footprint since they all share around 5GB.
+**So dont be surprised if the old checkpoints are deleted and new are downloaded !!!**.
+- **LTX2 Multi Passes Loras multipliers**: *LTX2* supports now loras multiplier that depend on the Pass No. For instance "1;0.5" means 1 will the strength for the first LTX2 pass and 0.5 will be the strength for the second pass.
+- **New Profile 3.5**: here is the lost kid of *Profile 3* & *Profile 5*, you got tons of VRAM, but little RAM ? Profile 3.5 will be your new friend as it will no longer use Reserved RAM to accelerate transfers. Use Profile 3.5 only if you can fit entirely a *Diffusion / Transformer* model in VRAM, otherwise the gen may be much slower.
+- **NVFP4 Quantization for LTX 2 & Flux 2**: you will now be able to load *NV FP4* model checkpoints in WanGP. On top of *Wan NV4* which was added recently, we now have *LTX 2 (non distilled)* & *Flux 2* support. NV FP4 uses slightly less VRAM and up to 30% less RAM.
+To enjoy fully the NV FP4 checkpoints (**at least 30% faster gens**), you will need a RTX 50xx and to upgrade to *Pytorch 2.9.1 / Cuda 13* with the latest version of *lightx2v kernels* (check *docs/INSTALLATION.md*). To observe the speed gain, you have to make sure the workload is quite high (high res, long video).
+### January 13th 2026: WanGP v10.24, When there is no VRAM left there is still some VRAM left ...
+- **LTX 2 - SUPER VRAM OPTIMIZATIONS**
+*With WanGP 10.21 HD 720p Video Gens of 10s just need now 8GB of VRAM!*
+LTX Team said this video gen was for 4k. So I had no choice but to squeeze more VRAM with further optimizations.
+After much suffering I have managed to reduce by at least 1/3 the VRAM requirements of LTX 2, which means:
+  - 10s at 720p can be done with only 8GB of VRAM
+  - 10s at 1080p with only 12 GB of VRAM
+  - 20s at 1080p with only 16 GB of VRAM
+  - 10s at Full 4k (3840 x 2176 !!!) with 24 GB of VRAM.  However the bad news is LTX 2 video is not for 4K, as 4K outputs may give you nightmares ...
+3K/4K resolutions will be available only if you enable them in the *Config* / *General* tab.
+- **Ic Loras support**: Use a *Control Video* to transfer *Pose*, *Depth*, *Canny Edges*. I have added some extra tweaks: with WanGP you can restrict the transfer to a *masked area*, define a *denoising strength* (how much the control video is going to be followed) and a *masking strength* (how much unmasked area is impacted)
+- **Start Image Strength**: This new slider will appear below a *Start Image* or Source *Video*. If you set it to values lower than 1 you may to reduce the static image effect, you get sometime with LTX2 i2v
+- **Custom Gemma Text Encoder for LTX 2**: As a practical case, the *Heretic* text encoder is now supported by WanGP. Check the *finetune* doc, but in short create a *finetune* that has a *text_encoder_URLS* key that contains a list of one or more file paths or URLs.
+- **Experimental Auto Recovery Failed Lora Pin**: Some users (with usually PC with less than 64 GB of RAM) have reported Out Of Memory although a model seemed to load just fine when starting a gen with Loras. This is sometime related to WanGP attempting (and failing due to unsufficient reserved RAM) to pin the Loras to Reserved Memory for faster gen. I have experimented a recovery mode that should release sufficient ressources to continue the Video Gen. This may solve the oom crashes with *LTX2 Default (non distilled)*
+- **Max Loras Pinned Slider**:  If the Auto Recovery Mode is still not sufficient, I have added a Slider at the bottom of the  *Configuration*  / *Performance* tab that you can use to prevent WanGP from Pinning Loras (to do so set it to 0). As if there is no loading attempt there wont be any crash...
+*update 10.21*: added slider Loras Max Pinning slider\
+*update 10.22*: added support for custom Ltx2 Text Encoder + Auto Recovery mode if Lora Pinning failed\
+*update 10.23*: Fixed text prompt ignore in profile 1 & 2 (this created random output videos)
+### January 9st 2026: WanGP v10.11, Spoiled again
+- **LTX 2**: here is the long awaited *Ovi Challenger*, LTX-2 generates video and an audio soundtrack. As usual this WanGP version is *low VRAM*. You should be able to run it with as low as 10 GB of VRAM. If you have at least 24 GB of VRAM you will be able to generate 20s at 720p in a single window in only 2 minutes with the distilled model.  WanGP LTX 2 version supports on day one, *Start/End keyframes*, *Sliding-Window* / *Video Continuation* and *Generation Preview*. A *LTX 2 distilled* is part of the package for a very fast generation.
+With WanGP v10.11 you can now force your soundtrack, it works like *Multitalk* / *Avatar* except in theory it should work with any kind of sound (not just vocals). Thanks to *Kijai* for showing it was possible.
+- **Z Image Twin Folder Turbo**: Z Image even faster as this variant can generate images with as little as 1 step (3 steps recommend)
+- **Qwen LanPaint**: very precise *In Painting*, offers a better integration of the inpainted area in the rest of the image. Beware it is up to 5x slower as it "searches" for the best replacement.
+- **Optimized Pytorch Compiler** : *Patience is the Mother of Virtue*. Finally I may (or may not) have fixed the PyTorch compiler with the Wan models. It should work in much diverse situations and takes much less time.
+- **LongCat Video**: experimental support which includes *LongCat Avatar* a talking head model. For the moment it is mostly for models collectors as it is very slow. It needs 40+ steps and each step contains up 3 passes.
+- **MMaudio NSFW**: for alternative audio background
+*update v10.11*: LTX 2, use your own soundtrack
+See full changelog: **[Changelog](docs/CHANGELOG.md)**
+## 🚀 Quick Start
+**One-click installation:**
+Get started instantly with [Pinokio App](https://pinokio.computer/)\
+It is recommended to use in Pinokio the Community Scripts *wan2gp* or *wan2gp-amd* by **Morpheus** rather than the official Pinokio install.
+**Manual installation:**
+```bash
+git clone https://github.com/deepbeepmeep/Wan2GP.git
+cd Wan2GP
+conda create -n wan2gp python=3.10.9
+conda activate wan2gp
+pip install torch==2.7.1 torchvision torchaudio --index-url https://download.pytorch.org/whl/test/cu128
+pip install -r requirements.txt
+```
+**Run the application:**
+```bash
+python wgp.py
+```
+First time using WanGP ? Just check the *Guides* tab, and you will find a selection of recommended models to use.
+**Update the application:**
+If using Pinokio use Pinokio to update otherwise:
+Get in the directory where WanGP is installed and:
+```bash
+git pull
+conda activate wan2gp
+pip install -r requirements.txt
+```
+if you get some error messages related to git, you may try the following (beware this will overwrite local changes made to the source code of WanGP):
+```bash
+git fetch origin && git reset --hard origin/main
+conda activate wan2gp
+pip install -r requirements.txt
+```
+**Run headless (batch processing):**
+Process saved queues without launching the web UI:
+```bash
+# Process a saved queue
+python wgp.py --process my_queue.zip
+```
+Create your queue in the web UI, save it with "Save Queue", then process it headless. See [CLI Documentation](docs/CLI.md) for details.
+## 🐳 Docker:
+**For Debian-based systems (Ubuntu, Debian, etc.):**
+```bash
+./run-docker-cuda-deb.sh
+```
+This automated script will:
+- Detect your GPU model and VRAM automatically
+- Select optimal CUDA architecture for your GPU
+- Install NVIDIA Docker runtime if needed
+- Build a Docker image with all dependencies
+- Run WanGP with optimal settings for your hardware
+**Docker environment includes:**
+- NVIDIA CUDA 12.4.1 with cuDNN support
+- PyTorch 2.6.0 with CUDA 12.4 support
+- SageAttention compiled for your specific GPU architecture
+- Optimized environment variables for performance (TF32, threading, etc.)
+- Automatic cache directory mounting for faster subsequent runs
+- Current directory mounted in container - all downloaded models, loras, generated videos and files are saved locally
+**Supported GPUs:** RTX 40XX, RTX 30XX, RTX 20XX, GTX 16XX, GTX 10XX, Tesla V100, A100, H100, and more.
+## 📦 Installation
+### Nvidia
+For detailed installation instructions for different GPU generations:
+- **[Installation Guide](docs/INSTALLATION.md)** - Complete setup instructions for RTX 10XX to RTX 50XX
+### AMD
+For detailed installation instructions for different GPU generations:
+- **[Installation Guide](docs/AMD-INSTALLATION.md)** - Complete setup instructions for Radeon RX 76XX, 77XX, 78XX & 79XX
+## 🎯 Usage
+### Basic Usage
+- **[Getting Started Guide](docs/GETTING_STARTED.md)** - First steps and basic usage
+- **[Models Overview](docs/MODELS.md)** - Available models and their capabilities
+### Advanced Features
+- **[Loras Guide](docs/LORAS.md)** - Using and managing Loras for customization
+- **[Finetunes](docs/FINETUNES.md)** - Add manually new models to WanGP
+- **[VACE ControlNet](docs/VACE.md)** - Advanced video control and manipulation
+- **[Command Line Reference](docs/CLI.md)** - All available command line options
+## 📚 Documentation
+- **[Changelog](docs/CHANGELOG.md)** - Latest updates and version history
+- **[Troubleshooting](docs/TROUBLESHOOTING.md)** - Common issues and solutions
+## 📚 Video Guides
+- Nice Video that explain how to use Vace:\
+https://www.youtube.com/watch?v=FMo9oN2EAvE
+- Another Vace guide:\
+https://www.youtube.com/watch?v=T5jNiEhf9xk
+## 🔗 Related Projects
+### Other Models for the GPU Poor
+- **[HuanyuanVideoGP](https://github.com/deepbeepmeep/HunyuanVideoGP)** - One of the best open source Text to Video generators
+- **[Hunyuan3D-2GP](https://github.com/deepbeepmeep/Hunyuan3D-2GP)** - Image to 3D and text to 3D tool
+- **[FluxFillGP](https://github.com/deepbeepmeep/FluxFillGP)** - Inpainting/outpainting tools based on Flux
+- **[Cosmos1GP](https://github.com/deepbeepmeep/Cosmos1GP)** - Text to world generator and image/video to world
+- **[OminiControlGP](https://github.com/deepbeepmeep/OminiControlGP)** - Flux-derived application for object transfer
+- **[YuE GP](https://github.com/deepbeepmeep/YuEGP)** - Song generator with instruments and singer's voice
+---
+<p align="center">
+Made with ❤️ by DeepBeepMeep
+</p>

defaults/ReadMe.txt ADDED Viewed

	@@ -0,0 +1,13 @@

+Please dot not modify any file in this Folder.
+If you want to change a property of a default model, copy the corrresponding model file in the ./finetunes folder and modify the properties you want to change in the new file.
+If a property is not in the new file, it will be inherited automatically from the default file that matches the same name file.
+For instance to hide a model:
+{
+	"model":
+	{
+		"visible": false
+	}
+}

defaults/ace_step_v1.json ADDED Viewed

	@@ -0,0 +1,19 @@

+{
+    "model": {
+        "name": "TTS ACE-Step v1 3.5B",
+        "architecture": "ace_step_v1",
+        "description": "ACE-Step, a fast open-source foundation diffusion based model for music generation that overcomes key limitations of existing approaches and achieves state-of-the-art performance.",
+        "URLs": [
+            "https://huggingface.co/DeepBeepMeep/TTS/resolve/main/ace_step_v1_transformer_bf16.safetensors",
+            "https://huggingface.co/DeepBeepMeep/TTS/resolve/main/ace_step_v1_transformer_quanto_bf16_int8.safetensors"
+        ]
+    },
+    "prompt": "[Verse]\nNeon rain on the city line\nYou hum the tune and I fall in time\n[Chorus]\nHold me close and keep the time",
+    "alt_prompt": "Dreamy synth-pop with shimmering pads, soft vocals, and a slow dance groove.",
+    "audio_prompt_type": "",
+    "audio_scale": 0.5,
+    "duration_seconds": 20,
+    "num_inference_steps": 60,
+    "guidance_scale": 7.0,
+    "scheduler_type": "euler"
+}

defaults/alpha.json ADDED Viewed

	@@ -0,0 +1,19 @@

+{
+	"model":
+	{
+		"name": "Wan2.1 Alpha v1.0 14B",
+		"architecture" : "alpha",
+		"description": "This model successfully generates various scenes with accurate and clearly rendered transparency. Notably, it can synthesize diverse semi-transparent objects, glowing effects, and fine-grained details such as hair. For each video generated you will find a Zip file with the same name that will contain the corresponding RGBA images.",
+		"URLs": "t2v",
+		"preload_URLs": [
+			"https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan_alpha_2.1_vae_rgb_channel.safetensors",
+			"https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan_alpha_2.1_vae_alpha_channel.safetensors"
+			],
+		"loras": [
+			"https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan_alpha_2.1_dora.safetensors"
+			],
+		"loras_multipliers": [ 1 ]
+	},
+	"prompt": "A large orange octopus is seen resting. The background of the video is transparent."
+}

defaults/alpha2.json ADDED Viewed

	@@ -0,0 +1,19 @@

+{
+	"model":
+	{
+		"name": "Wan2.1 Alpha v2.0 14B",
+		"architecture" : "alpha2",
+		"description": "Wan-Alpha v2.0 generates transparent videos with fine-grained alpha detail (hair, glow, smoke). For each video, a Zip file with RGBA frames is produced.",
+		"URLs": "t2v",
+		"preload_URLs": [
+			"https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan_alpha_2.1_vae_rgb_channel_v2.safetensors",
+			"https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan_alpha_2.1_vae_alpha_channel_v2.safetensors",
+			"https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/gauss_mask"
+		],
+		"loras": [
+			"https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan_alpha_2.1_dora_v2.safetensors"
+			],
+		"loras_multipliers": [ 1 ]
+	},
+	"prompt": "A large orange octopus is seen resting. The background of the video is transparent."
+}

defaults/alpha2_sf.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+	"model":
+	{
+		"name": "Wan2.1 Alpha v2.0 Lightning 14B",
+		"architecture" : "alpha2",
+		"description": "Wan-Alpha v2.0 Lightning with transparent video output and RGBA frames zip.",
+		"URLs": "t2v_sf",
+		"preload_URLs": "alpha2",
+		"loras": "alpha2",
+		"loras_multipliers": [ 1 ],
+		"profiles_dir" : [""]
+	},
+	"prompt": "A large orange octopus is seen resting. The background of the video is transparent.",
+	"num_inference_steps": 4,
+	"guidance_scale": 1,
+	"flow_shift": 3
+}

defaults/alpha_sf.json ADDED Viewed

	@@ -0,0 +1,17 @@

+{
+	"model":
+	{
+		"name": "Wan2.1 Alpha v1.0 Lightning 14B",
+		"architecture" : "alpha",
+		"description": "This model is accelerated by the Lightning / SelfForcing process. It successfully generates various scenes with accurate and clearly rendered transparency. Notably, it can synthesize diverse semi-transparent objects, glowing effects, and fine-grained details such as hair. For each video generated you will find a Zip file with the same name that will contain the corresponding RGBA images.",
+		"URLs": "t2v_sf",
+		"preload_URLs": "alpha",
+		"loras": "alpha",
+		"loras_multipliers": [ 1 ],
+		"profiles_dir" : [""]
+	},
+	"prompt": "A large orange octopus is seen resting. The background of the video is transparent.",
+    "num_inference_steps": 4,
+    "guidance_scale": 1,
+    "flow_shift": 3
+}

defaults/animate.json ADDED Viewed

	@@ -0,0 +1,17 @@

+{
+    "model": {
+        "name": "Wan2.2 Animate 14B",
+        "architecture": "animate",
+        "description": "Wan-Animate takes a video and a character image as input, and generates a video in either 'Animation' or 'Replacement' mode. Sliding Window of 81 frames at least are recommeded to obtain the best Style continuity.",
+        "URLs": [
+            "https://huggingface.co/DeepBeepMeep/Wan2.2/resolve/main/wan2.2_animate_14B_bf16.safetensors",
+            "https://huggingface.co/DeepBeepMeep/Wan2.2/resolve/main/wan2.2_animate_14B_quanto_fp16_int8.safetensors",
+            "https://huggingface.co/DeepBeepMeep/Wan2.2/resolve/main/wan2.2_animate_14B_quanto_bf16_int8.safetensors"
+        ],
+		"preload_URLs" :
+		[
+			"https://huggingface.co/DeepBeepMeep/Wan2.2/resolve/main/wan2.2_animate_relighting_lora.safetensors"
+		],
+        "group": "wan2_2"
+    }
+}

defaults/chatterbox.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+    "model": {
+        "name": "TTS Chatterbox Multilingual",
+        "architecture": "chatterbox",
+        "description": "Resemble AI's open multilingual TTS with language selection via model mode.",
+        "URLs": []
+    },
+	"prompt": "Welcome to Chatterbox !",
+	"negative_prompt": "",
+	"audio_prompt_type": "A",
+	"model_mode": "en",
+	"repeat_generation": 1,
+	"video_length": 0,
+	"num_inference_steps": 0,
+	"pace": 0.5,
+	"exaggeration": 0.5,
+	"temperature": 0.8
+}

defaults/chrono_edit.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+    "model": {
+        "name": "Wan2.1 Chrono Edit 14B",
+        "architecture": "chrono_edit",
+        "description": "This model in an Image Editor that will follow your instructions. It generates internally a video to produce the desired effect on the original image (the result being the End Image). It expects a very specific prompt format. It is why you must absolutely use the Prompt Enhancer that has been tuned for this model.",
+        "URLs": [
+            "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_chrono_edit_14B_mbf16.safetensors",
+            "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_chrono_edit_14B_quanto_mbf16_int8.safetensors",
+            "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_chrono_edit_14B_quanto_mfp16_int8.safetensors"
+        ]
+    },
+	"prompt": "Rotate the pose of the woman so that she is facing the right"
+}

defaults/chrono_edit_distill.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+    "model": {
+        "name": "Wan2.1 Chrono Edit Distill 14B",
+        "architecture": "chrono_edit",
+        "description": "This model in an Image Editor that will follow your instructions. It generates internally a video to produce the desired effect on the original image (the result being the End Image). It expects a very specific prompt format. It is why you must absolutely use the Prompt Enhancer that has been tuned for this model. This version is accelerated using the Chrono Distill",
+        "URLs": "chrono_edit",
+		"profiles_dir": [""],
+		"loras": ["https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/loras_accelerators/chronoedit_distill_lora.safetensors"],
+		"loras_multipliers": [1]
+    },
+	"prompt": "Rotate the pose of the woman so that she is facing the right",
+	"num_inference_steps": 8,
+	"flow_shift": 2,
+    "guidance_phases": 1,
+    "guidance_scale": 1
+}

defaults/fantasy.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+	"model":
+	{
+		"name": "Fantasy Talking 720p 14B",
+		"architecture" : "fantasy",
+		"modules": [ ["https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_fantasy_speaking_14B_bf16.safetensors"]],
+		"description": "The Fantasy Talking model corresponds to the original Wan image 2 video model combined with the Fantasy Speaking module to process an audio Input.",
+		"URLs": "i2v_720p"
+	},
+    "resolution": "1280x720"
+}

defaults/flf2v_720p.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+	"model":
+	{
+		"name": "First Last Frame to Video 720p (FLF2V) 14B",
+		"architecture" : "flf2v_720p",
+		"visible" : true,
+		"description": "The First Last Frame 2 Video model is the official model Image 2 Video model that supports Start and End frames.",
+		"URLs": [
+			"https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_FLF2V_720p_14B_mbf16.safetensors",
+			"https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_FLF2V_720p_14B_quanto_mbf16_int8.safetensors",
+			"https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_FLF2V_720p_14B_quanto_mfp16_int8.safetensors"
+		],
+		"auto_quantize": true
+	},
+    "resolution": "1280x720"
+}

defaults/flux.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+    "model": {
+        "name": "Flux 1 Dev 12B",
+        "architecture": "flux",
+        "description": "FLUX.1 Dev is a 12 billion parameter rectified flow transformer capable of generating images from text descriptions.",
+        "URLs": [
+            "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-dev_bf16.safetensors",
+            "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-dev_quanto_bf16_int8.safetensors"
+        ],
+        "image_outputs": true
+    },
+    "prompt": "draw a hat",
+    "resolution": "1280x720",
+    "batch_size": 1
+}

defaults/flux2_dev.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+    "model": {
+        "name": "Flux 2 Dev 32B",
+        "architecture": "flux2_dev",
+        "description": "FLUX.2 Dev is the latest rectified flow transformer from Black Forest Labs for image generation and editing.",
+        "URLs": [
+            "https://huggingface.co/DeepBeepMeep/Flux2/resolve/main/flux2-dev.safetensors",
+            "https://huggingface.co/DeepBeepMeep/Flux2/resolve/main/flux2-dev_quanto_bf16_int8.safetensors"
+        ]
+    },
+    "prompt": "draw a hat on top of a hat inside a hat",
+    "resolution": "1024x1024",
+    "batch_size": 1,
+    "embedded_guidance_scale": 4,
+    "sampling_steps": 30
+}

defaults/flux2_dev_nvfp4.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+    "model": {
+        "name": "Flux 2 Dev NVFP4 32B",
+        "architecture": "flux2_dev",
+        "description": "NVFP4-quantized Flux 2 Dev checkpoint (mixed).",
+        "URLs": [
+            "https://huggingface.co/DeepBeepMeep/Flux2/resolve/main/flux2-dev-nvfp4-mixed.safetensors"
+        ]
+    },
+    "prompt": "draw a hat on top of a hat inside a hat",
+    "resolution": "1024x1024",
+    "batch_size": 1,
+    "embedded_guidance_scale": 4,
+    "sampling_steps": 30
+}

defaults/flux2_klein_4b.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+    "model": {
+        "name": "Flux 2 Klein 4B",
+        "architecture": "flux2_klein_4b",
+        "description": "FLUX.2 Klein 4B is a compact rectified flow transformer for image generation and editing.",
+        "URLs": [
+            "https://huggingface.co/DeepBeepMeep/Flux2/resolve/main/flux-2-klein-4b.safetensors",
+            "https://huggingface.co/DeepBeepMeep/Flux2/resolve/main/flux-2-klein-4b_quanto_bf16_int8.safetensors"
+        ]
+    },
+    "prompt": "a cozy reading nook with warm sunlight, soft textiles, and a cup of tea on a wooden side table",
+    "resolution": "1024x1024",
+    "batch_size": 1,
+    "embedded_guidance_scale": 1,
+    "num_inference_steps": 4
+}

defaults/flux2_klein_9b.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+    "model": {
+        "name": "Flux 2 Klein 9B",
+        "architecture": "flux2_klein_9b",
+        "description": "FLUX.2 Klein 9B is a balanced rectified flow transformer for image generation and editing.",
+        "URLs": [
+            "https://huggingface.co/DeepBeepMeep/Flux2/resolve/main/flux-2-klein-9b.safetensors",
+            "https://huggingface.co/DeepBeepMeep/Flux2/resolve/main/flux-2-klein-9b_quanto_bf16_int8.safetensors"
+        ]
+    },
+    "prompt": "a glass greenhouse filled with lush tropical plants, misty air, and dappled light",
+    "resolution": "1024x1024",
+    "batch_size": 1,
+    "embedded_guidance_scale": 1,
+    "num_inference_steps": 4
+}

defaults/flux_chroma.json ADDED Viewed

	@@ -0,0 +1,17 @@

+{
+    "model": {
+        "name": "Flux 1 Chroma 1 HD 8.9B",
+        "architecture": "flux_chroma",
+        "description": "FLUX.1 Chroma is a 8.9 billion parameters model. As a base model, Chroma1 is intentionally designed to be an excellent starting point for finetuning. It provides a strong, neutral foundation for developers, researchers, and artists to create specialized models..",
+        "URLs": [
+            "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-chroma_hd_bf16.safetensors",
+            "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-chroma_hd_quanto_bf16_int8.safetensors"
+        ],
+        "image_outputs": true
+    },
+    "prompt": "draw a hat",
+    "resolution": "1280x720",
+	"guidance_scale": 3.0,
+    "num_inference_steps": 20,
+    "batch_size": 1
+}

defaults/flux_chroma_radiance.json ADDED Viewed

	@@ -0,0 +1,17 @@

+{
+    "model": {
+        "name": "Flux 1 Chroma Radiance 8.9B",
+        "architecture": "flux_chroma_radiance",
+        "description": "FLUX.1 Chroma Radiance 20th of October 2025 version) is a 8.9 billion parameters model. As a base model, Chroma Radiance is intentionally designed to be an excellent starting point for finetuning. It provides a strong, neutral foundation for developers, researchers, and artists to create specialized models.",
+        "URLs": [
+            "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-chroma_radiance_201025_bf16.safetensors",
+            "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-chroma_radiance_201025_quanto_bf16_int8.safetensors"
+        ],
+        "image_outputs": true
+    },
+    "prompt": "draw a hat",
+    "resolution": "1280x720",
+    "guidance_scale": 3.0,
+    "num_inference_steps": 20,
+    "batch_size": 1
+}

defaults/flux_dev_kontext.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+    "model": {
+        "name": "Flux 1 Dev Kontext 12B",
+        "architecture": "flux_dev_kontext",
+        "description": "FLUX.1 Kontext is a 12 billion parameter rectified flow transformer capable of editing images based on instructions stored in the Prompt. Please be aware that Flux Kontext is picky on the resolution of the input image and the output dimensions may not match the dimensions of the input image.",
+        "URLs": [
+            "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1_kontext_dev_bf16.safetensors",
+            "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1_kontext_dev_quanto_bf16_int8.safetensors"
+        ]
+    },
+	"prompt": "add a hat",
+    "resolution": "1280x720",
+    "batch_size": 1
+}

defaults/flux_dev_kontext_dreamomni2.json ADDED Viewed

	@@ -0,0 +1,19 @@

+{
+    "model": {
+        "name": "Flux 1 DreamOmni2 12B",
+        "architecture": "flux_dev_kontext_dreamomni2",
+        "description": "DreamOmni2 is a Multimodal Instruction-based Editing and Generation Model",
+        "URLs": [
+            "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1_kontext_dev_bf16.safetensors",
+            "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1_kontext_dev_quanto_bf16_int8.safetensors"
+        ],
+		"preload_URLs": [ "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux_dreamomni2_edit_lora.safetensors",
+						"https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux_dreamomni2_gen_lora.safetensors"
+						]
+    },
+	"prompt": "In the scene, the character from the first image stands on the left, and the character from the second image stands on the right. They are shaking hands against the backdrop of a spaceship interior.",
+    "resolution": "1280x720",
+    "batch_size": 1
+}

defaults/flux_dev_umo.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+    "model": {
+        "name": "Flux 1 UMO 12B",
+        "architecture": "flux_dev_umo",
+        "description": "FLUX.1 UMO Dev is a model that can Edit Images with a specialization in combining multiple image references (resized internally at 512x512 max) to produce an Image output. Best Image preservation at 768x768 Resolution Output.",
+        "URLs": "flux",
+		"loras": ["https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-dev-UMO_dit_lora_bf16.safetensors"],
+		"resolutions":  [ ["1024x1024 (1:1)", "1024x1024"],
+						["768x1024 (3:4)", "768x1024"],
+						["1024x768 (4:3)", "1024x768"],
+						["512x1024 (1:2)", "512x1024"],
+						["1024x512 (2:1)", "1024x512"],
+						["768x768 (1:1)", "768x768"],
+						["768x512 (3:2)", "768x512"],
+						["512x768 (2:3)", "512x768"]]
+    },
+	"prompt": "the man is wearing a hat",
+	"embedded_guidance_scale": 4,
+    "resolution": "768x768",
+    "batch_size": 1
+}

defaults/flux_dev_uso.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+    "model": {
+        "name": "Flux 1 USO Dev 12B",
+        "architecture": "flux_dev_uso",
+        "description": "FLUX.1 USO Dev is a model that can Edit Images with a specialization in Style Transfers (up to two).",
+		"modules": [ ["https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-dev-USO_projector_bf16.safetensors"]],
+        "URLs": "flux",
+		"loras": ["https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-dev-USO_dit_lora_bf16.safetensors"]
+    },
+	"prompt": "the man is wearing a hat",
+	"embedded_guidance_scale": 4,
+    "resolution": "1024x1024",
+    "batch_size": 1
+}

defaults/flux_krea.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+    "model": {
+        "name": "Flux 1 Dev Krea 12B",
+        "architecture": "flux",
+        "description": "Cutting-edge output quality, with a focus on aesthetic photography..",
+        "URLs": [
+            "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-krea-dev_bf16.safetensors",
+            "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-krea-dev_quanto_bf16_int8.safetensors"
+        ],
+        "image_outputs": true
+    },
+    "prompt": "draw a hat",
+    "resolution": "1280x720",
+    "batch_size": 1
+}

defaults/flux_schnell.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+    "model": {
+        "name": "Flux 1 Schnell 12B",
+        "architecture": "flux_schnell",
+        "description": "FLUX.1 Schnell is a 12 billion parameter rectified flow transformer capable of generating images from text descriptions. As a distilled model it requires fewer denoising steps.",
+        "URLs": [
+            "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-schnell_bf16.safetensors",
+            "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-schnell_quanto_bf16_int8.safetensors"
+        ],
+        "image_outputs": true
+    },
+    "prompt": "draw a hat",
+    "resolution": "1280x720",
+    "num_inference_steps": 10,
+    "batch_size": 1
+}

defaults/flux_srpo.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+    "model": {
+        "name": "Flux 1 Dev SRPO 12B",
+        "architecture": "flux",
+        "description": "By fine-tuning the FLUX.1.dev model with optimized denoising and online reward adjustment, SRPO improves its human-evaluated realism and aesthetic quality by over 3x.",
+        "URLs": [
+            "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-srpo-dev_bf16.safetensors",
+            "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-srpo-dev_quanto_bf16_int8.safetensors"
+        ]
+    },
+    "prompt": "draw a hat",
+    "resolution": "1024x1024",
+    "batch_size": 1
+}

defaults/flux_srpo_uso.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+    "model": {
+        "name": "Flux 1 USO SRPO 12B",
+        "architecture": "flux_dev_uso",
+        "description": "FLUX.1 USO SRPO is a model that can Edit Images with a specialization in Style Transfers (up to two). It leverages the improved Image quality brought by the SRPO process",
+		"modules": [ "flux_dev_uso"],
+        "URLs": "flux_srpo",
+		"loras": "flux_dev_uso"
+    },
+	"prompt": "the man is wearing a hat",
+	"embedded_guidance_scale": 4,
+    "resolution": "1024x1024",
+    "batch_size": 1
+}

defaults/fun_inp.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+	"model":
+	{
+		"name": "Fun InP image2video 14B",
+		"architecture" : "fun_inp",
+		"description": "The Fun model is an alternative image 2 video that supports out the box End Image fixing (contrary to the original Wan image 2 video model).",
+		"URLs": [
+			"https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_Fun_InP_14B_bf16.safetensors",
+			"https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_Fun_InP_14B_quanto_int8.safetensors",
+			"https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_Fun_InP_14B_quanto_fp16_int8.safetensors"
+		]
+	}
+}

defaults/fun_inp_1.3B.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+	"model":
+	{
+		"name": "Fun InP image2video 1.3B",
+		"architecture" : "fun_inp_1.3B",
+		"description": "The Fun model is an alternative image 2 video that supports out the box End Image fixing (contrary to the original Wan image 2 video model). The 1.3B adds also image 2 to video capability  to the 1.3B model.",
+		"URLs": [
+			"https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_Fun_InP_1.3B_bf16.safetensors"
+		]
+	}
+}

defaults/heartmula_oss_3b.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+    "model": {
+        "name": "TTS HeartMuLa OSS 3B",
+        "architecture": "heartmula_oss_3b",
+        "description": "HeartMuLa open music generation conditioned on lyrics and tags.",
+        "URLs": [
+            "https://huggingface.co/DeepBeepMeep/TTS/resolve/main/heartmula_oss_3b_bf16.safetensors",
+            "https://huggingface.co/DeepBeepMeep/TTS/resolve/main/heartmula_oss_3b_quanto_bf16_int8.safetensors"
+        ]
+    },
+	"prompt": "[Verse]\nMorning light through the window pane\nI hum a tune to chase the rain\nSteady steps on a quiet street\nHeart and rhythm, gentle beat",
+	"alt_prompt": "piano,happy,wedding",
+	"temperature": 1.0
+}

defaults/heartmula_rl_oss_3b_20260123.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+    "model": {
+        "name": "TTS HeartMuLa RL OSS (20260123) 3B",
+        "architecture": "heartmula_oss_3b",
+        "description": "HeartMuLa RL OSS 3B checkpoint (20260123) with updated codec support. This version should be better at following instructions thanks to a reinforced learning training.",
+        "URLs": [
+            "https://huggingface.co/DeepBeepMeep/TTS/resolve/main/heartmula_rl_oss_3b_20260123_bf16.safetensors",
+            "https://huggingface.co/DeepBeepMeep/TTS/resolve/main/heartmula_rl_oss_3b_20260123_quanto_bf16_int8.safetensors"
+        ],
+        "heartmula_codec_version": "20260123"
+    },
+    "prompt": "[Verse]\nMorning light through the window pane\nI hum a tune to chase the rain\nSteady steps on a quiet street\nHeart and rhythm, gentle beat",
+    "alt_prompt": "piano,happy,wedding",
+    "temperature": 1.0
+}

defaults/hunyuan.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+	"model":
+	{
+		"name": "Hunyuan Video Text2video 720p 13B",
+		"architecture" : "hunyuan",
+		"description": "Probably the best text 2 video model available.",
+		"URLs": [
+			"https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_720_bf16.safetensors",
+			"https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_720_quanto_int8.safetensors"
+		]
+	}
+}

defaults/hunyuan_1_5_480_i2v.json ADDED Viewed

	@@ -0,0 +1,17 @@

+{
+    "model": {
+        "name": "Hunyuan Video 1.5  Image2video 480p 8B",
+        "architecture": "hunyuan_1_5_i2v",
+        "description": "HunyuanVideo-1.5 is a video generation model that delivers top-tier quality with only 8.3B parameters, significantly lowering the barrier to usage. This is the Image2video 480p version.",
+        "URLs": [
+            "https://huggingface.co/DeepBeepMeep/HunyuanVideo1.5/resolve/main/hunyuan_video_1.5_i2v_480_bf16.safetensors",
+            "https://huggingface.co/DeepBeepMeep/HunyuanVideo1.5/resolve/main/hunyuan_video_1.5_i2v_480_quanto_bf16_int8.safetensors"
+        ]
+    },
+	"prompt" : "",
+    "resolution": "832x480",
+    "video_length": 97,
+    "num_inference_steps": 30,
+    "guidance_scale": 6.0,
+    "flow_shift": 5.0
+}

defaults/hunyuan_1_5_480_i2v_step_distilled.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+    "model": {
+        "name": "Hunyuan Video 1.5  Image2video 480p Step Distilled 8B",
+        "architecture": "hunyuan_1_5_i2v",
+        "description": "HunyuanVideo-1.5 is a video generation model that delivers top-tier quality with only 8.3B parameters, significantly lowering the barrier to usage. This is the official Step Distilled 480p versionby the Hunyuan Team.",
+		"profiles_dir": [""],
+        "URLs": [
+            "https://huggingface.co/DeepBeepMeep/HunyuanVideo1.5/resolve/main/hunyuan_video_1.5_i2v_480_step_distilled_quanto_bf16_int8.safetensors",
+            "https://huggingface.co/DeepBeepMeep/HunyuanVideo1.5/resolve/main/hunyuan_video_1.5_i2v_480_step_distilled_bf16.safetensors"
+        ]
+    },
+    "prompt": "",
+    "resolution": "832x480",
+    "video_length": 97,
+    "num_inference_steps": 8,
+    "guidance_scale": 1.0,
+    "flow_shift": 5.0
+}

defaults/hunyuan_1_5_480_t2v.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+    "model": {
+        "name": "Hunyuan Video 1.5  Text2video 480p 8B",
+        "architecture": "hunyuan_1_5_t2v",
+        "description": "HunyuanVideo-1.5 is a video generation model that delivers top-tier quality with only 8.3B parameters, significantly lowering the barrier to usage. This is the Text2video 480p version.",
+        "URLs": [
+            "https://huggingface.co/DeepBeepMeep/HunyuanVideo1.5/resolve/main/hunyuan_video_1.5_480_bf16.safetensors",
+            "https://huggingface.co/DeepBeepMeep/HunyuanVideo1.5/resolve/main/hunyuan_video_1.5_480_quanto_bf16_int8.safetensors"
+        ]
+    },
+    "resolution": "832x480",
+    "video_length": 97,
+    "num_inference_steps": 30,
+    "guidance_scale": 6.0,
+    "flow_shift": 5.0
+}

defaults/hunyuan_1_5_480_t2v_lightx2v.json ADDED Viewed

	@@ -0,0 +1,17 @@

+{
+    "model": {
+        "name": "Hunyuan Video 1.5 Text2video 480p Lightx2v 8B",
+        "architecture": "hunyuan_1_5_t2v",
+        "description": "HunyuanVideo-1.5 is a video generation model that delivers top-tier quality with only 8.3B parameters, significantly lowering the barrier to usage. This is the Text2video 480p version accelerated by lightx2v.",
+		"profiles_dir": [""],
+        "URLs": [
+            "https://huggingface.co/DeepBeepMeep/HunyuanVideo1.5/resolve/main/hy1.5_t2v_480p_lightx2v_4step_bf16.safetensors",
+            "https://huggingface.co/DeepBeepMeep/HunyuanVideo1.5/resolve/main/hy1.5_t2v_480p_lightx2v_4step_quanto_int8_bf16.safetensors"
+        ]
+    },
+    "resolution": "832x480",
+    "video_length": 97,
+    "num_inference_steps": 4,
+    "guidance_scale": 1.0,
+    "flow_shift": 8.0
+}

defaults/hunyuan_1_5_i2v.json ADDED Viewed

	@@ -0,0 +1,17 @@

+{
+    "model": {
+        "name": "Hunyuan Video 1.5 Image2video 720p 8B",
+        "architecture": "hunyuan_1_5_i2v",
+        "description": "HunyuanVideo-1.5 is a video generation model that delivers top-tier quality with only 8.3B parameters, significantly lowering the barrier to usage. This is the Image2video 720p version.",
+        "URLs": [
+            "https://huggingface.co/DeepBeepMeep/HunyuanVideo1.5/resolve/main/hunyuan_video_1.5_i2v_720_bf16.safetensors",
+            "https://huggingface.co/DeepBeepMeep/HunyuanVideo1.5/resolve/main/hunyuan_video_1.5_i2v_720_quanto_bf16_int8.safetensors"
+        ]
+    },
+	"prompt" : "",
+    "resolution": "1280x720",
+    "video_length": 97,
+    "num_inference_steps": 30,
+    "guidance_scale": 6.0,
+    "flow_shift": 7.0
+}

defaults/hunyuan_1_5_t2v.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+    "model": {
+        "name": "Hunyuan Video 1.5  Text2video 720p 8B",
+        "architecture": "hunyuan_1_5_t2v",
+        "description": "HunyuanVideo-1.5 is a video generation model that delivers top-tier quality with only 8.3B parameters, significantly lowering the barrier to usage. This is the Text2video 720p version.",
+        "URLs": [
+            "https://huggingface.co/DeepBeepMeep/HunyuanVideo1.5/resolve/main/hunyuan_video_1.5_720_bf16.safetensors",
+            "https://huggingface.co/DeepBeepMeep/HunyuanVideo1.5/resolve/main/hunyuan_video_1.5_720_quanto_bf16_int8.safetensors"
+        ]
+    },
+    "resolution": "1280x720",
+    "video_length": 97,
+    "num_inference_steps": 30,
+    "guidance_scale": 6.0,
+    "flow_shift": 9.0
+}

defaults/hunyuan_1_5_upsampler.json ADDED Viewed

	@@ -0,0 +1,22 @@

+{
+    "model": {
+        "name": "Hunyuan Video 1.5  Upsampler 720p 8B",
+        "architecture": "hunyuan_1_5_upsampler",
+		"description": "HunyuanVideo-1.5 is a video generation model that delivers top-tier quality with only 8.3B parameters, significantly lowering the barrier to usage. This version is a specialized 720p upsampler.",
+        "URLs": [
+            "https://huggingface.co/DeepBeepMeep/HunyuanVideo1.5/resolve/main/hunyuan_video_1.5_upsampler_720_bf16.safetensors",
+            "https://huggingface.co/DeepBeepMeep/HunyuanVideo1.5/resolve/main/hunyuan_video_1.5_upsampler_720_quanto_bf16_int8.safetensors"
+        ],
+		"upsampler": "720",
+		"preload_URLs": [
+			"https://huggingface.co/DeepBeepMeep/HunyuanVideo1.5/resolve/main/hunyuan_video_1.5_upsampler_720p_sr_distilled.safetensors",
+			"https://huggingface.co/DeepBeepMeep/HunyuanVideo1.5/resolve/main/hunyuan_video_1.5_upsampler_720p_sr_distilled_config.json"
+			]
+    },
+    "resolution": "1280x720",
+    "video_length": 97,
+    "num_inference_steps": 6,
+    "guidance_scale": 1,
+    "flow_shift": 2.0
+}

defaults/hunyuan_1_5_upsampler_1080.json ADDED Viewed

	@@ -0,0 +1,22 @@

+{
+    "model": {
+        "name": "Hunyuan Video 1.5  Upsampler 1080p 8B",
+        "architecture": "hunyuan_1_5_upsampler",
+		"description": "HunyuanVideo-1.5 is a video generation model that delivers top-tier quality with only 8.3B parameters, significantly lowering the barrier to usage. This version is a specialized 1080p upsampler.",
+		"upsampler": "1080",
+        "URLs": [
+            "https://huggingface.co/DeepBeepMeep/HunyuanVideo1.5/resolve/main/hunyuan_video_1.5_upsampler_1080_bf16.safetensors",
+            "https://huggingface.co/DeepBeepMeep/HunyuanVideo1.5/resolve/main/hunyuan_video_1.5_upsampler_1080_quanto_bf16_int8.safetensors"
+        ],
+		"preload_URLs": [
+			"https://huggingface.co/DeepBeepMeep/HunyuanVideo1.5/resolve/main/hunyuan_video_1.5_upsampler_1080p_sr_distilled.safetensors",
+			"https://huggingface.co/DeepBeepMeep/HunyuanVideo1.5/resolve/main/hunyuan_video_1.5_upsampler_1080p_sr_distilled_config.json"
+			]
+    },
+    "resolution": "1920x1088",
+    "video_length": 97,
+    "num_inference_steps": 8,
+    "guidance_scale": 1,
+    "flow_shift": 2.0
+}

defaults/hunyuan_avatar.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+	"model":
+	{
+		"name": "Hunyuan Video Avatar 720p 13B",
+		"architecture" : "hunyuan_avatar",
+		"description": "With the Hunyuan Video Avatar model you can animate a person based on the content of an audio input. Please note that the video generator works by processing 128 frames segment at a time (even if you ask less). The good news is that it will concatenate multiple segments for long video generation (max 3 segments recommended as the quality will get worse).",
+		"URLs": [
+			"https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_avatar_720_bf16.safetensors",
+			"https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_avatar_720_quanto_bf16_int8.safetensors"
+		]
+	}
+}

defaults/hunyuan_custom.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+	"model":
+	{
+		"name": "Hunyuan Video Custom 720p 13B",
+		"architecture" : "hunyuan_custom",
+		"description": "The Hunyuan Video Custom model is probably the best model to transfer people (only people for the moment) as it is quite good to keep their identity. However it is slow as to get good results, you need to generate 720p videos with 30 steps.",
+		"URLs": [
+			"https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_custom_720_bf16.safetensors",
+			"https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_custom_720_quanto_bf16_int8.safetensors"
+		]
+	}
+}

defaults/hunyuan_custom_audio.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+	"model":
+	{
+		"name": "Hunyuan Video Custom Audio 720p 13B",
+		"architecture" : "hunyuan_custom_audio",
+		"description": "The Hunyuan Video Custom Audio model can be used to generate scenes of a person speaking given a Reference Image and a Recorded Voice or Song. The reference image is not a start image and therefore one can represent the person in a different context.The video length can be anything up to 10s. It is also quite good to generate no sound Video based on a person.",
+		"URLs": [
+			"https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_custom_audio_720_bf16.safetensors",
+			"https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_custom_audio_720_quanto_bf16_int8.safetensors"
+		]
+	}
+}

defaults/hunyuan_custom_edit.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+	"model":
+	{
+		"name": "Hunyuan Video Custom Edit 720p 13B",
+		"architecture" : "hunyuan_custom_edit",
+		"description": "The Hunyuan Video Custom Edit model can be used to do Video inpainting on a person (add accessories or completely replace the person). You will need in any case to define a Video Mask which will indicate which area of the Video should be edited.",
+		"URLs": [
+			"https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_custom_edit_720_bf16.safetensors",
+			"https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_custom_edit_720_quanto_bf16_int8.safetensors"
+		]
+	}
+}

defaults/hunyuan_i2v.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+	"model":
+	{
+		"name": "Hunyuan Video Image2video 720p 13B",
+		"architecture" : "hunyuan_i2v",
+		"description": "A good looking image 2 video model, but not so good in prompt adherence.",
+		"URLs": [
+			"https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_i2v_720_bf16v2.safetensors",
+			"https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_i2v_720_quanto_int8v2.safetensors"
+		]
+	}
+}