vidfom commited on Oct 28, 2025

Commit

618f472

verified ·

1 Parent(s): d1e67e8

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitignore +47 -0
Custom Resolutions Instructions.txt +16 -0
Dockerfile +92 -0
LICENSE.txt +46 -0
README.md +256 -0
defaults/ReadMe.txt +13 -0
defaults/alpha.json +19 -0
defaults/alpha_sf.json +17 -0
defaults/animate.json +17 -0
defaults/chatterbox.json +18 -0
defaults/fantasy.json +11 -0
defaults/flf2v_720p.json +16 -0
defaults/flux.json +15 -0
defaults/flux_chroma.json +17 -0
defaults/flux_dev_kontext.json +16 -0
defaults/flux_dev_kontext_dreamomni2.json +19 -0
defaults/flux_dev_umo.json +23 -0
defaults/flux_dev_uso.json +16 -0
defaults/flux_krea.json +15 -0
defaults/flux_schnell.json +16 -0
defaults/flux_srpo.json +14 -0
defaults/flux_srpo_uso.json +16 -0
defaults/fun_inp.json +13 -0
defaults/fun_inp_1.3B.json +11 -0
defaults/hunyuan.json +12 -0
defaults/hunyuan_avatar.json +12 -0
defaults/hunyuan_custom.json +12 -0
defaults/hunyuan_custom_audio.json +12 -0
defaults/hunyuan_custom_edit.json +12 -0
defaults/hunyuan_i2v.json +12 -0
defaults/hunyuan_t2v_accvideo.json +30 -0
defaults/hunyuan_t2v_fast.json +32 -0
defaults/i2v.json +13 -0
defaults/i2v_2_2.json +25 -0
defaults/i2v_2_2_multitalk.json +18 -0
defaults/i2v_720p.json +14 -0
defaults/i2v_fusionix.json +11 -0
defaults/infinitetalk.json +16 -0
defaults/infinitetalk_multi.json +16 -0
defaults/ltxv_13B.json +19 -0
defaults/ltxv_distilled.json +15 -0
defaults/lucy_edit.json +20 -0
defaults/lucy_edit_fastwan.json +17 -0
defaults/lynx.json +18 -0
defaults/moviigen.json +16 -0
defaults/multitalk.json +15 -0
defaults/multitalk_720p.json +13 -0
defaults/ovi.json +18 -0
defaults/ovi_fastwan.json +17 -0
defaults/phantom_1.3B.json +11 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,47 @@

+.*
+*.py[cod]
+# *.jpg
+*.jpeg
+# *.png
+*.gif
+*.bmp
+*.mp4
+*.mov
+*.mkv
+*.log
+*.zip
+*.pt
+*.pth
+*.ckpt
+*.safetensors
+#*.json
+# *.txt
+*.backup
+*.pkl
+*.html
+*.pdf
+*.whl
+*.exe
+cache
+__pycache__/
+storage/
+samples/
+!.gitignore
+!requirements.txt
+.DS_Store
+*DS_Store
+google/
+Wan2.1-T2V-14B/
+Wan2.1-T2V-1.3B/
+Wan2.1-I2V-14B-480P/
+Wan2.1-I2V-14B-720P/
+outputs/
+outputs2/
+gradio_outputs/
+ckpts/
+loras/
+loras_i2v/
+settings/
+wgp_config.json

Custom Resolutions Instructions.txt ADDED Viewed

	@@ -0,0 +1,16 @@

+You can override the choice of Resolutions offered by WanGP, if you create a file "resolutions.json" in the main WanGP folder.
+This file is composed of a list of 2 elements sublists. Each 2 elements sublist should have the format ["Label", "WxH"] where W, H are respectively the Width and Height of the resolution. Please make sure that W and H are multiples of 16. The letter "x" should be placed inbetween these two dimensions.
+Here is below a sample "resolutions.json" file :
+[
+	["1280x720 (16:9, 720p)", "1280x720"],
+	["720x1280 (9:16, 720p)", "720x1280"],
+	["1024x1024 (1:1, 720p)", "1024x1024"],
+	["1280x544 (21:9, 720p)", "1280x544"],
+	["544x1280 (9:21, 720p)", "544x1280"],
+	["1104x832 (4:3, 720p)", "1104x832"],
+	["832x1104 (3:4, 720p)", "832x1104"],
+    ["960x960 (1:1, 720p)", "960x960"],
+    ["832x480 (16:9, 480p)", "832x480"]
+]

Dockerfile ADDED Viewed

	@@ -0,0 +1,92 @@

+FROM nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04
+# Build arg for GPU architectures - specify which CUDA compute capabilities to compile for
+# Common values:
+#   7.0  - Tesla V100
+#   7.5  - RTX 2060, 2070, 2080, Titan RTX
+#   8.0  - A100, A800 (Ampere data center)
+#   8.6  - RTX 3060, 3070, 3080, 3090 (Ampere consumer)
+#   8.9  - RTX 4070, 4080, 4090 (Ada Lovelace)
+#   9.0  - H100, H800 (Hopper data center)
+#   12.0 - RTX 5070, 5080, 5090 (Blackwell) - Note: sm_120 architecture
+#
+# Examples:
+#   RTX 3060: --build-arg CUDA_ARCHITECTURES="8.6"
+#   RTX 4090: --build-arg CUDA_ARCHITECTURES="8.9"
+#   Multiple: --build-arg CUDA_ARCHITECTURES="8.0;8.6;8.9"
+#
+# Note: Including 8.9 or 9.0 may cause compilation issues on some setups
+# Default includes 8.0 and 8.6 for broad Ampere compatibility
+ARG CUDA_ARCHITECTURES="8.0;8.6"
+ENV DEBIAN_FRONTEND=noninteractive
+# Install system dependencies
+RUN apt update && \
+    apt install -y \
+    python3 python3-pip git wget curl cmake ninja-build \
+    libgl1 libglib2.0-0 ffmpeg && \
+    apt clean
+WORKDIR /workspace
+COPY requirements.txt .
+# Upgrade pip first
+RUN pip install --upgrade pip setuptools wheel
+# Install requirements if exists
+RUN pip install -r requirements.txt
+# Install PyTorch with CUDA support
+RUN pip install --extra-index-url https://download.pytorch.org/whl/cu124 \
+    torch==2.6.0+cu124 torchvision==0.21.0+cu124
+# Install SageAttention from git (patch GPU detection)
+ENV TORCH_CUDA_ARCH_LIST="${CUDA_ARCHITECTURES}"
+ENV FORCE_CUDA="1"
+ENV MAX_JOBS="1"
+COPY <<EOF /tmp/patch_setup.py
+import os
+with open('setup.py', 'r') as f:
+    content = f.read()
+# Get architectures from environment variable
+arch_list = os.environ.get('TORCH_CUDA_ARCH_LIST')
+arch_set = '{' + ', '.join([f'"{arch}"' for arch in arch_list.split(';')]) + '}'
+# Replace the GPU detection section
+old_section = '''compute_capabilities = set()
+device_count = torch.cuda.device_count()
+for i in range(device_count):
+    major, minor = torch.cuda.get_device_capability(i)
+    if major < 8:
+        warnings.warn(f"skipping GPU {i} with compute capability {major}.{minor}")
+        continue
+    compute_capabilities.add(f"{major}.{minor}")'''
+new_section = 'compute_capabilities = ' + arch_set + '''
+print(f"Manually set compute capabilities: {compute_capabilities}")'''
+content = content.replace(old_section, new_section)
+with open('setup.py', 'w') as f:
+    f.write(content)
+EOF
+RUN git clone https://github.com/thu-ml/SageAttention.git /tmp/sageattention && \
+    cd /tmp/sageattention && \
+    python3 /tmp/patch_setup.py && \
+    pip install --no-build-isolation .
+RUN useradd -u 1000 -ms /bin/bash user
+RUN chown -R user:user /workspace
+RUN mkdir /home/user/.cache && \
+    chown -R user:user /home/user/.cache
+COPY entrypoint.sh /workspace/entrypoint.sh
+ENTRYPOINT ["/workspace/entrypoint.sh"]

LICENSE.txt ADDED Viewed

	@@ -0,0 +1,46 @@

+WanGP NON-COMMERCIAL EVALUATION LICENSE 1.0
+Definitions
+1.1 “Software” means the source code, binaries, libraries, utilities and UI released under this license.
+1.2 “Output” means images, videos or other media produced by running the Software.
+1.3 “Commercial Use” means:
+a) selling, sublicensing, renting, leasing, or otherwise distributing the Software, in whole or in part, for a fee or other consideration; or
+b) offering the Software (or any derivative) as part of a paid product or hosted service; or
+c) using the Software (or any derivative) to provide cloud-based or backend services, where end users access or pay for those services.
+License Grant
+Subject to Section 3:
+a) You are granted a worldwide, non-exclusive, royalty-free, revocable license to use, reproduce, modify and distribute the Software for non-commercial purposes only.
+b) You are granted a worldwide, non-exclusive, royalty-free, irrevocable license to use, reproduce, modify and distribute the Output for any purpose, including commercial sale, provided that any commercial distribution of the Output includes a clear notice that the Output was produced (in whole or in part) using WanGP, along with a hyperlink to the WanGP application’s About tab or repository.
+Restrictions
+3.1 You MAY NOT distribute, sublicense or otherwise make available the Software (or any derivative) for Commercial Use.
+3.2 You MAY sell, license or otherwise commercially exploit the Output without restriction.
+3.3 If you wish to use the Software for Commercial Use, you must obtain a separate commercial license from the Licensor.
+Third-Party Components 4.1 The Software includes components licensed under various open-source licenses (e.g., Apache 2.0, MIT, BSD). 4.2 You must comply with all applicable terms of those third-party licenses, including preservation of copyright notices, inclusion of required license texts, and patent-grant provisions. 4.3 You can find the full text of each third-party license via the “About” tab in the WanGP application, which provides links to their original GitHub repositories.
+Attribution
+5.1 You must give appropriate credit by including:
+• a copy of this license (or a link to it), and
+• a notice that your use is based on “WanGP”.
+5.2 You may do so in any reasonable manner, but not in any way that suggests the Licensor endorses you or your use.
+Disclaimer of Warranty & Liability
+THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE.
+Commercial Licensing The Licensor may offer commercial licenses for the Software, which grant rights to use the Software for Commercial Use. Please contact [deepbeepmeep@yahoo.com] for terms and pricing.
+Effective Date & Previous Versions
+8.1 This license is effective as of the date the LICENSE file is updated in the WanGP repository.
+8.2 Any copies of the Software obtained under prior license terms before this Effective Date remain governed by those prior terms; such granted rights are irrevocable.
+8.3 Use of the Software after the release of any subsequent version by the Licensor is subject to the terms of the then-current license, unless a separate agreement is in place.
+Acceptable Use / Moral Clause
+9.1 You MAY NOT use the Software or the Output to facilitate or produce content that is illegal, harmful, violent, harassing, defamatory, fraudulent, or otherwise violates applicable laws or fundamental human rights.
+9.2 You MAY NOT deploy the Software or Output in contexts that promote hate speech, extremist ideology, human rights abuses, or other actions that could foreseeably cause significant harm to individuals or groups.
+9.3 The Licensor reserves the right to terminate the rights granted under this license if a licensee materially breaches this Acceptable Use clause.
+END OF LICENSE

README.md ADDED Viewed

	@@ -0,0 +1,256 @@

+# WanGP
+-----
+<p align="center">
+<b>WanGP by DeepBeepMeep : The best Open Source Video Generative Models Accessible to the GPU Poor</b>
+</p>
+WanGP supports the Wan (and derived models), Hunyuan Video and LTV Video models with:
+- Low VRAM requirements (as low as 6 GB of VRAM is sufficient for certain models)
+- Support for old Nvidia GPUs (RTX 10XX, 20xx, ...)
+- Support for AMD GPUs Radeon RX 76XX, 77XX, 78XX & 79XX, instructions in the Installation Section Below.
+- Very Fast on the latest GPUs
+- Easy to use Full Web based interface
+- Auto download of the required model adapted to your specific architecture
+- Tools integrated to facilitate Video Generation : Mask Editor, Prompt Enhancer, Temporal and Spatial Generation, MMAudio, Video Browser, Pose / Depth / Flow extractor
+- Loras Support to customize each model
+- Queuing system : make your shopping list of videos to generate and come back later
+**Discord Server to get Help from Other Users and show your Best Videos:** https://discord.gg/g7efUW9jGV
+**Follow DeepBeepMeep on Twitter/X to get the Latest News**: https://x.com/deepbeepmeep
+-----
+### You have your choice of Dark or Light Theme
+<img width="1895" height="1023" alt="Screenshot 2025-10-23 210313" src="https://github.com/user-attachments/assets/3778ae4e-6a95-4752-ba47-bb160c653310" />
+-----
+<img width="1899" height="1020" alt="Screenshot 2025-10-23 210500" src="https://github.com/user-attachments/assets/5e524260-ad24-4203-acf2-6622676a83bb" />
+-----
+![Screen Recording 2025-10-23 210625 - frame at 0m9s](https://github.com/user-attachments/assets/c65a815e-09fa-41a7-bc49-5f879b0b8ece)
+-----
+## 🔥 Latest Updates :
+### October 26 2025: WanGP v9.21, Why isn't all my VRAM used ?
+*WanGP exclusive*:  VRAM requirements have never been that low !
+**Wan 2.2 Ovi 10 GB** for all the GPU Poors of the World: *only 6 GB of VRAM to generate 121 frames at 720p*. With 16 GB of VRAM, you may even be able to load all the model in VRAM with *Memory Profile 3*
+To get the x10 speed effect just apply the FastWan Lora Accelerator that comes prepackaged with Ovi (acccessible in the  dropdown box Settings at the top)
+*update v9.21*: Got FastWan to work with Ovi: it is now 10 times faster ! (not including the VAE)
+### October 24 2025: WanGP v9.10, What else will you ever need after this one ?
+With WanGP v9 you will have enough features to go to a desert island with no internet connection and comes back with a full Hollywood movie.
+First here are the new models supported:
+- **Wan 2.1 Alpha** : a very requested model that can generate videos with *semi transparent background* (as it is very lora picky it supports only the *Self Forcing / lightning* loras accelerators)
+- **Chatterbox Multilingual**: the first *Voice Generator* in WanGP. Let's say you have a flu and lost your voice (somehow I can't think of another usecase), the world will still be able to hear you as *Chatterbox* can generate up to 15s clips of your voice using a recorded voice sample. Chatterbox works with numerous languages out the box.
+- **Flux DreamOmni2** : another wannabe *Nano Banana* image Editor / image composer. The *Edit Mode* ("Conditional Image is first Main Subject ...") seems to work better than the *Gen Mode* (Conditional Images are People / Objects ..."). If you have at least 16 GB of VRAM it is recommended to force profile 3 for this model (it uses an autoregressive model for the prompt encoding and the start may be slow).
+- **Ditto** (new with *WanGP 9.1* !): a powerful Video 2 Video model, can change for instance the style or the material visible in the video. Be aware it is an instruct based model, so the prompt should contain intructions.
+Upgraded Features:
+- A new **Audio Gallery** to store your Chatterbox generations and import your audio assets. *Metadata support* (stored gen settings) for *Wav files* generated with WanGP available from day one.
+- **Matanyone** improvements: you can now use it during a video gen, it will *suspend gracefully the Gen in progress*. *Input Video / Images* can be resized for faster processing & lower VRAM. Image version can now generate *Green screens* (not used by WanGP but I did it because someone asked for it and I am nice) and *Alpha masks*.
+- **Images Stored in Metadata**: Video Gen *Settings Metadata* that are stored in the Generated Videos can now contain the Start Image, Image Refs used to generate the Video. Many thanks to **Gunther-Schulz** for this contribution
+- **Three Levels of Hierarchy** to browse the models / finetunes: you can collect as many finetunes as you want now and they will no longer encumber the UI.
+- Added **Loras Accelerators** for *Wan 2.1 1.3B*, *Wan 2.2 i2v*, *Flux* and the latest *Wan 2.2 Lightning*
+- Finetunes now support **Custom Text Encoders** : you will need to use the "text_encoder_URLs" key. Please check the finetunes doc.
+- Sometime Less is More: removed the palingenesis finetunes that were controversial
+Huge Kudos & Thanks to **Tophness** that has outdone himself with these Great Features:
+- **Multicolors Queue** items with **Drag & Drop** to reorder them
+- **Edit a Gen Request** that is already in the queue
+- Added **Plugin support** to WanGP : found that features are missing in WanGP, you can now add tabs at the top in WanGP. Each tab may contain a full embedded App that can share data with the Video Generator of WanGP. Please check the Plugin guide written by Tophness and don't hesitate to contact him or me on the Discord if you have a plugin you want to share. I have added a new Plugins channels to discuss idea of plugins and help each other developing plugins. *Idea for a PlugIn that may end up popular*: a screen where you view the hard drive space used per model and that will let you remove unused models weights
+- Two Plugins ready to use designed & developped by **Tophness**: an **Extended Gallery** and a **Lora multipliers Wizard**
+WanGP v9 is now targetting Pytorch 2.8 although it should still work with 2.7, don't forget to upgrade by doing:
+```bash
+pip install torch==2.8.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/test/cu128
+```
+You will need to upgrade Sage Attention or Flash (check the installation guide)
+*Update info: you might have some git error message while upgrading to v9 if WanGP is already installed.*
+Sorry about that if that's the case, you will need to reinstall WanGP.
+There are two different ways to fix this issue while still preserving your data:
+1) **Command Line**
+If you have access to a terminal window :
+```
+cd installation_path_of_wangp
+git fetch origin && git reset --hard origin/main
+pip install -r requirements.txt
+```
+2) **Generic Method**
+a) move outside the installation WanGP folder the folders **ckpts**, **settings**, **outputs** and all the **loras** folders and the file **wgp_config.json**
+b) delete the WanGP folder and reinstall
+c) move back what you moved in a)
+## 🔥 Latest Updates :
+### October 6 2025: WanGP v8.999 - A few last things before the Big Unknown ...
+This new version hasn't any new model...
+...but temptation to upgrade will be high as it contains a few Loras related features that may change your Life:
+- **Ready to use Loras Accelerators Profiles** per type of model that you can apply on your current *Generation Settings*. Next time I will recommend a *Lora Accelerator*, it will be only one click away. And best of all of the required Loras will be downloaded automatically. When you apply an *Accelerator Profile*, input fields like the *Number of Denoising Steps* *Activated Loras*, *Loras Multipliers* (such as "1;0 0;1" ...) will be automatically filled. However your video specific fields will be preserved, so it will be easy to switch between Profiles to experiment. With *WanGP 8.993*, the *Accelerator Loras* are now merged with *Non Accelerator Loras". Things are getting too easy...
+- **Embedded Loras URL** : WanGP will now try to remember every Lora URLs it sees. For instance if someone sends you some settings that contain Loras URLs or you extract the Settings of Video generated by a friend with Loras URLs, these URLs will be automatically added to *WanGP URL Cache*. Conversely everything you will share (Videos, Settings, Lset files) will contain the download URLs if they are known. You can also download directly a Lora in WanGP by using the *Download Lora* button a the bottom. The Lora will be immediatly available and added to WanGP lora URL cache. This will work with *Hugging Face* as a repository. Support for CivitAi will come as soon as someone will nice enough to post a GitHub PR ...
+- **.lset file** supports embedded Loras URLs. It has never been easier to share a Lora with a friend. As a reminder a .lset file can be created directly from *WanGP Web Interface* and it contains a list of Loras and their multipliers, a Prompt and Instructions how to use these loras (like the Lora's *Trigger*). So with embedded Loras URL, you can send an .lset file by email or share it on discord: it is just a 1 KB tiny text, but with it other people will be able to use Gigabytes Loras as these will be automatically downloaded.
+I have created the new Discord Channel **share-your-settings** where you can post your *Settings* or *Lset files*. I will be pleased to add new Loras Accelerators in the list of WanGP *Accelerators Profiles if you post some good ones there.
+*With the 8.993 update*, I have added support for **Scaled FP8 format**. As a sample case, I have created finetunes for the **Wan 2.2 PalinGenesis** Finetune which is quite popular recently. You will find it in 3 flavors : *t2v*, *i2v* and *Lightning Accelerated for t2v*.
+The *Scaled FP8 format* is widely used as it the format used by ... *ComfyUI*. So I except a flood of Finetunes in the *share-your-finetune* channel. If not it means this feature was useless and I will remove it &#x1F608;&#x1F608;&#x1F608;
+Not enough Space left on your SSD to download more models ? Would like to reuse Scaled FP8 files in your ComfyUI Folder without duplicating them ? Here comes *WanGP 8.994* **Multiple Checkpoints Folders** : you just need to move the files into different folders / hard drives or reuse existing folders and let know WanGP about it in the *Config Tab* and WanGP will be able to put all the parts together.
+Last but not least the Lora's documentation has been updated.
+*update 8.991*: full power of *Vace Lynx* unleashed with new combinations such as Landscape + Face / Clothes + Face  / Injectd Frame (Start/End frames/...) + Face
+*update 8.992*: optimized gen with Lora, should be 10% faster if many loras
+*update 8.993*: Support for *Scaled FP8* format and samples *Paligenesis* finetunes, merged Loras Accelerators and Non Accelerators
+*update 8.994*: Added custom checkpoints folders
+*update 8.999*: fixed a lora + fp8 bug and version sync for the jump to the unknown
+### September 30 2025: WanGP v8.9 - Combinatorics
+This new version of WanGP introduces **Wan 2.1 Lynx** the best Control Net so far to transfer *Facial Identity*. You will be amazed to recognize your friends even with a completely different hair style. Congrats to the *Byte Dance team* for this achievement. Lynx works quite with well *Fusionix t2v* 10 steps.
+*WanGP 8.9* also illustrate how existing WanGP features can be easily combined with new models. For instance with *Lynx* you will get out of the box *Video to Video* and *Image/Text to Image*.
+Another fun combination is *Vace* + *Lynx*, which works much better than *Vace StandIn*. I have added sliders to change the weight of Vace & Lynx to allow you to tune the effects.
+See full changelog: **[Changelog](docs/CHANGELOG.md)**
+## 📋 Table of Contents
+- [🚀 Quick Start](#-quick-start)
+- [📦 Installation](#-installation)
+- [🎯 Usage](#-usage)
+- [📚 Documentation](#-documentation)
+- [🔗 Related Projects](#-related-projects)
+## 🚀 Quick Start
+**One-click installation:**
+- Get started instantly with [Pinokio App](https://pinokio.computer/)
+- Use Redtash1 [One Click Install with Sage](https://github.com/Redtash1/Wan2GP-Windows-One-Click-Install-With-Sage)
+**Manual installation:**
+```bash
+git clone https://github.com/deepbeepmeep/Wan2GP.git
+cd Wan2GP
+conda create -n wan2gp python=3.10.9
+conda activate wan2gp
+pip install torch==2.8.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/test/cu128
+pip install -r requirements.txt
+```
+**Run the application:**
+```bash
+python wgp.py
+```
+**Update the application:**
+If using Pinokio use Pinokio to update otherwise:
+Get in the directory where WanGP is installed and:
+```bash
+git pull
+conda activate wan2gp
+pip install -r requirements.txt
+```
+if you get some error messages related to git, you may try the following (beware this will overwrite local changes made to the source code of WanGP):
+```bash
+git fetch origin && git reset --hard origin/main
+conda activate wan2gp
+pip install -r requirements.txt
+```
+## 🐳 Docker:
+**For Debian-based systems (Ubuntu, Debian, etc.):**
+```bash
+./run-docker-cuda-deb.sh
+```
+This automated script will:
+- Detect your GPU model and VRAM automatically
+- Select optimal CUDA architecture for your GPU
+- Install NVIDIA Docker runtime if needed
+- Build a Docker image with all dependencies
+- Run WanGP with optimal settings for your hardware
+**Docker environment includes:**
+- NVIDIA CUDA 12.4.1 with cuDNN support
+- PyTorch 2.6.0 with CUDA 12.4 support
+- SageAttention compiled for your specific GPU architecture
+- Optimized environment variables for performance (TF32, threading, etc.)
+- Automatic cache directory mounting for faster subsequent runs
+- Current directory mounted in container - all downloaded models, loras, generated videos and files are saved locally
+**Supported GPUs:** RTX 40XX, RTX 30XX, RTX 20XX, GTX 16XX, GTX 10XX, Tesla V100, A100, H100, and more.
+## 📦 Installation
+### Nvidia
+For detailed installation instructions for different GPU generations:
+- **[Installation Guide](docs/INSTALLATION.md)** - Complete setup instructions for RTX 10XX to RTX 50XX
+### AMD
+For detailed installation instructions for different GPU generations:
+- **[Installation Guide](docs/AMD-INSTALLATION.md)** - Complete setup instructions for Radeon RX 76XX, 77XX, 78XX & 79XX
+## 🎯 Usage
+### Basic Usage
+- **[Getting Started Guide](docs/GETTING_STARTED.md)** - First steps and basic usage
+- **[Models Overview](docs/MODELS.md)** - Available models and their capabilities
+### Advanced Features
+- **[Loras Guide](docs/LORAS.md)** - Using and managing Loras for customization
+- **[Finetunes](docs/FINETUNES.md)** - Add manually new models to WanGP
+- **[VACE ControlNet](docs/VACE.md)** - Advanced video control and manipulation
+- **[Command Line Reference](docs/CLI.md)** - All available command line options
+## 📚 Documentation
+- **[Changelog](docs/CHANGELOG.md)** - Latest updates and version history
+- **[Troubleshooting](docs/TROUBLESHOOTING.md)** - Common issues and solutions
+## 📚 Video Guides
+- Nice Video that explain how to use Vace:\
+https://www.youtube.com/watch?v=FMo9oN2EAvE
+- Another Vace guide:\
+https://www.youtube.com/watch?v=T5jNiEhf9xk
+## 🔗 Related Projects
+### Other Models for the GPU Poor
+- **[HuanyuanVideoGP](https://github.com/deepbeepmeep/HunyuanVideoGP)** - One of the best open source Text to Video generators
+- **[Hunyuan3D-2GP](https://github.com/deepbeepmeep/Hunyuan3D-2GP)** - Image to 3D and text to 3D tool
+- **[FluxFillGP](https://github.com/deepbeepmeep/FluxFillGP)** - Inpainting/outpainting tools based on Flux
+- **[Cosmos1GP](https://github.com/deepbeepmeep/Cosmos1GP)** - Text to world generator and image/video to world
+- **[OminiControlGP](https://github.com/deepbeepmeep/OminiControlGP)** - Flux-derived application for object transfer
+- **[YuE GP](https://github.com/deepbeepmeep/YuEGP)** - Song generator with instruments and singer's voice
+---
+<p align="center">
+Made with ❤️ by DeepBeepMeep
+</p>

defaults/ReadMe.txt ADDED Viewed

	@@ -0,0 +1,13 @@

+Please dot not modify any file in this Folder.
+If you want to change a property of a default model, copy the corrresponding model file in the ./finetunes folder and modify the properties you want to change in the new file.
+If a property is not in the new file, it will be inherited automatically from the default file that matches the same name file.
+For instance to hide a model:
+{
+	"model":
+	{
+		"visible": false
+	}
+}

defaults/alpha.json ADDED Viewed

	@@ -0,0 +1,19 @@

+{
+	"model":
+	{
+		"name": "Wan2.1 Alpha 14B",
+		"architecture" : "alpha",
+		"description": "This model successfully generates various scenes with accurate and clearly rendered transparency. Notably, it can synthesize diverse semi-transparent objects, glowing effects, and fine-grained details such as hair. For each video generated you will find a Zip file with the same name that will contain the corresponding RGBA images.",
+		"URLs": "t2v",
+		"preload_URLs": [
+			"https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan_alpha_2.1_vae_rgb_channel.safetensors",
+			"https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan_alpha_2.1_vae_alpha_channel.safetensors"
+			],
+		"loras": [
+			"https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan_alpha_2.1_dora.safetensors"
+			],
+		"loras_multipliers": [ 1 ]
+	},
+	"prompt": "A large orange octopus is seen resting. The background of the video is transparent."
+}

defaults/alpha_sf.json ADDED Viewed

	@@ -0,0 +1,17 @@

+{
+	"model":
+	{
+		"name": "Wan2.1 Alpha Lightning 14B",
+		"architecture" : "alpha",
+		"description": "This model is accelerated by the Lightning / SelfForcing process. It successfully generates various scenes with accurate and clearly rendered transparency. Notably, it can synthesize diverse semi-transparent objects, glowing effects, and fine-grained details such as hair. For each video generated you will find a Zip file with the same name that will contain the corresponding RGBA images.",
+		"URLs": "t2v_sf",
+		"preload_URLs": "alpha",
+		"loras": "alpha",
+		"loras_multipliers": [ 1 ],
+		"profiles_dir" : [""]
+	},
+	"prompt": "A large orange octopus is seen resting. The background of the video is transparent.",
+    "num_inference_steps": 4,
+    "guidance_scale": 1,
+    "flow_shift": 3
+}

defaults/animate.json ADDED Viewed

	@@ -0,0 +1,17 @@

+{
+    "model": {
+        "name": "Wan2.2 Animate 14B",
+        "architecture": "animate",
+        "description": "Wan-Animate takes a video and a character image as input, and generates a video in either 'Animation' or 'Replacement' mode. Sliding Window of 81 frames at least are recommeded to obtain the best Style continuity.",
+        "URLs": [
+            "https://huggingface.co/DeepBeepMeep/Wan2.2/resolve/main/wan2.2_animate_14B_bf16.safetensors",
+            "https://huggingface.co/DeepBeepMeep/Wan2.2/resolve/main/wan2.2_animate_14B_quanto_fp16_int8.safetensors",
+            "https://huggingface.co/DeepBeepMeep/Wan2.2/resolve/main/wan2.2_animate_14B_quanto_bf16_int8.safetensors"
+        ],
+		"preload_URLs" :
+		[
+			"https://huggingface.co/DeepBeepMeep/Wan2.2/resolve/main/wan2.2_animate_relighting_lora.safetensors"
+		],
+        "group": "wan2_2"
+    }
+}

defaults/chatterbox.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+    "model": {
+        "name": "TTS Chatterbox Multilingual",
+        "architecture": "chatterbox",
+        "description": "Resemble AI's open multilingual TTS with language selection via model mode.",
+        "URLs": []
+    },
+	"prompt": "Welcome to Chatterbox !",
+	"negative_prompt": "",
+	"audio_prompt_type": "A",
+	"model_mode": "en",
+	"repeat_generation": 1,
+	"video_length": 0,
+	"num_inference_steps": 0,
+	"pace": 0.5,
+	"exaggeration": 0.5,
+	"temperature": 0.8
+}

defaults/fantasy.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+	"model":
+	{
+		"name": "Fantasy Talking 720p 14B",
+		"architecture" : "fantasy",
+		"modules": [ ["https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_fantasy_speaking_14B_bf16.safetensors"]],
+		"description": "The Fantasy Talking model corresponds to the original Wan image 2 video model combined with the Fantasy Speaking module to process an audio Input.",
+		"URLs": "i2v_720p"
+	},
+    "resolution": "1280x720"
+}

defaults/flf2v_720p.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+	"model":
+	{
+		"name": "First Last Frame to Video 720p (FLF2V) 14B",
+		"architecture" : "flf2v_720p",
+		"visible" : true,
+		"description": "The First Last Frame 2 Video model is the official model Image 2 Video model that supports Start and End frames.",
+		"URLs": [
+			"https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_FLF2V_720p_14B_mbf16.safetensors",
+			"https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_FLF2V_720p_14B_quanto_mbf16_int8.safetensors",
+			"https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_FLF2V_720p_14B_quanto_mfp16_int8.safetensors"
+		],
+		"auto_quantize": true
+	},
+    "resolution": "1280x720"
+}

defaults/flux.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+    "model": {
+        "name": "Flux 1 Dev 12B",
+        "architecture": "flux",
+        "description": "FLUX.1 Dev is a 12 billion parameter rectified flow transformer capable of generating images from text descriptions.",
+        "URLs": [
+            "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-dev_bf16.safetensors",
+            "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-dev_quanto_bf16_int8.safetensors"
+        ],
+        "image_outputs": true
+    },
+    "prompt": "draw a hat",
+    "resolution": "1280x720",
+    "batch_size": 1
+}

defaults/flux_chroma.json ADDED Viewed

	@@ -0,0 +1,17 @@

+{
+    "model": {
+        "name": "Flux 1 Chroma 1 HD 8.9B",
+        "architecture": "flux_chroma",
+        "description": "FLUX.1 Chroma is a 8.9 billion parameters model. As a base model, Chroma1 is intentionally designed to be an excellent starting point for finetuning. It provides a strong, neutral foundation for developers, researchers, and artists to create specialized models..",
+        "URLs": [
+            "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-chroma_hd_bf16.safetensors",
+            "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-chroma_hd_quanto_bf16_int8.safetensors"
+        ],
+        "image_outputs": true
+    },
+    "prompt": "draw a hat",
+    "resolution": "1280x720",
+	"guidance_scale": 3.0,
+    "num_inference_steps": 20,
+    "batch_size": 1
+}

defaults/flux_dev_kontext.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+    "model": {
+        "name": "Flux 1 Dev Kontext 12B",
+        "architecture": "flux_dev_kontext",
+        "description": "FLUX.1 Kontext is a 12 billion parameter rectified flow transformer capable of editing images based on instructions stored in the Prompt. Please be aware that Flux Kontext is picky on the resolution of the input image and the output dimensions may not match the dimensions of the input image.",
+        "URLs": [
+            "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1_kontext_dev_bf16.safetensors",
+            "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1_kontext_dev_quanto_bf16_int8.safetensors"
+        ]
+    },
+	"prompt": "add a hat",
+    "resolution": "1280x720",
+    "batch_size": 1
+}

defaults/flux_dev_kontext_dreamomni2.json ADDED Viewed

	@@ -0,0 +1,19 @@

+{
+    "model": {
+        "name": "Flux 1 DreamOmni2 12B",
+        "architecture": "flux_dev_kontext_dreamomni2",
+        "description": "DreamOmni2 is a Multimodal Instruction-based Editing and Generation Model",
+        "URLs": [
+            "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1_kontext_dev_bf16.safetensors",
+            "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1_kontext_dev_quanto_bf16_int8.safetensors"
+        ],
+		"preload_URLs": [ "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux_dreamomni2_edit_lora.safetensors",
+						"https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux_dreamomni2_gen_lora.safetensors"
+						]
+    },
+	"prompt": "In the scene, the character from the first image stands on the left, and the character from the second image stands on the right. They are shaking hands against the backdrop of a spaceship interior.",
+    "resolution": "1280x720",
+    "batch_size": 1
+}

defaults/flux_dev_umo.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+    "model": {
+        "name": "Flux 1 UMO Dev 12B",
+        "architecture": "flux_dev_umo",
+        "description": "FLUX.1 UMO Dev is a model that can Edit Images with a specialization in combining multiple image references (resized internally at 512x512 max) to produce an Image output. Best Image preservation at 768x768 Resolution Output.",
+        "URLs": "flux",
+		"loras": ["https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-dev-UMO_dit_lora_bf16.safetensors"],
+		"resolutions":  [ ["1024x1024 (1:1)", "1024x1024"],
+						["768x1024 (3:4)", "768x1024"],
+						["1024x768 (4:3)", "1024x768"],
+						["512x1024 (1:2)", "512x1024"],
+						["1024x512 (2:1)", "1024x512"],
+						["768x768 (1:1)", "768x768"],
+						["768x512 (3:2)", "768x512"],
+						["512x768 (2:3)", "512x768"]]
+    },
+	"prompt": "the man is wearing a hat",
+	"embedded_guidance_scale": 4,
+    "resolution": "768x768",
+    "batch_size": 1
+}

defaults/flux_dev_uso.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+    "model": {
+        "name": "Flux 1 USO Dev 12B",
+        "architecture": "flux_dev_uso",
+        "description": "FLUX.1 USO Dev is a model that can Edit Images with a specialization in Style Transfers (up to two).",
+		"modules": [ ["https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-dev-USO_projector_bf16.safetensors"]],
+        "URLs": "flux",
+		"loras": ["https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-dev-USO_dit_lora_bf16.safetensors"]
+    },
+	"prompt": "the man is wearing a hat",
+	"embedded_guidance_scale": 4,
+    "resolution": "1024x1024",
+    "batch_size": 1
+}

defaults/flux_krea.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+    "model": {
+        "name": "Flux 1 Dev Krea 12B",
+        "architecture": "flux",
+        "description": "Cutting-edge output quality, with a focus on aesthetic photography..",
+        "URLs": [
+            "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-krea-dev_bf16.safetensors",
+            "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-krea-dev_quanto_bf16_int8.safetensors"
+        ],
+        "image_outputs": true
+    },
+    "prompt": "draw a hat",
+    "resolution": "1280x720",
+    "batch_size": 1
+}

defaults/flux_schnell.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+    "model": {
+        "name": "Flux 1 Schnell 12B",
+        "architecture": "flux_schnell",
+        "description": "FLUX.1 Schnell is a 12 billion parameter rectified flow transformer capable of generating images from text descriptions. As a distilled model it requires fewer denoising steps.",
+        "URLs": [
+            "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-schnell_bf16.safetensors",
+            "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-schnell_quanto_bf16_int8.safetensors"
+        ],
+        "image_outputs": true
+    },
+    "prompt": "draw a hat",
+    "resolution": "1280x720",
+    "num_inference_steps": 10,
+    "batch_size": 1
+}

defaults/flux_srpo.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+    "model": {
+        "name": "Flux 1 Dev SRPO 12B",
+        "architecture": "flux",
+        "description": "By fine-tuning the FLUX.1.dev model with optimized denoising and online reward adjustment, SRPO improves its human-evaluated realism and aesthetic quality by over 3x.",
+        "URLs": [
+            "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-srpo-dev_bf16.safetensors",
+            "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-srpo-dev_quanto_bf16_int8.safetensors"
+        ]
+    },
+    "prompt": "draw a hat",
+    "resolution": "1024x1024",
+    "batch_size": 1
+}

defaults/flux_srpo_uso.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+    "model": {
+        "name": "Flux 1 USO SRPO 12B",
+        "architecture": "flux_dev_uso",
+        "description": "FLUX.1 USO SRPO is a model that can Edit Images with a specialization in Style Transfers (up to two). It leverages the improved Image quality brought by the SRPO process",
+		"modules": [ "flux_dev_uso"],
+        "URLs": "flux_srpo",
+		"loras": "flux_dev_uso"
+    },
+	"prompt": "the man is wearing a hat",
+	"embedded_guidance_scale": 4,
+    "resolution": "1024x1024",
+    "batch_size": 1
+}

defaults/fun_inp.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+	"model":
+	{
+		"name": "Fun InP image2video 14B",
+		"architecture" : "fun_inp",
+		"description": "The Fun model is an alternative image 2 video that supports out the box End Image fixing (contrary to the original Wan image 2 video model).",
+		"URLs": [
+			"https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_Fun_InP_14B_bf16.safetensors",
+			"https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_Fun_InP_14B_quanto_int8.safetensors",
+			"https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_Fun_InP_14B_quanto_fp16_int8.safetensors"
+		]
+	}
+}

defaults/fun_inp_1.3B.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+	"model":
+	{
+		"name": "Fun InP image2video 1.3B",
+		"architecture" : "fun_inp_1.3B",
+		"description": "The Fun model is an alternative image 2 video that supports out the box End Image fixing (contrary to the original Wan image 2 video model). The 1.3B adds also image 2 to video capability  to the 1.3B model.",
+		"URLs": [
+			"https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_Fun_InP_1.3B_bf16.safetensors"
+		]
+	}
+}

defaults/hunyuan.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+	"model":
+	{
+		"name": "Hunyuan Video Text2video 720p 13B",
+		"architecture" : "hunyuan",
+		"description": "Probably the best text 2 video model available.",
+		"URLs": [
+			"https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_720_bf16.safetensors",
+			"https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_720_quanto_int8.safetensors"
+		]
+	}
+}

defaults/hunyuan_avatar.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+	"model":
+	{
+		"name": "Hunyuan Video Avatar 720p 13B",
+		"architecture" : "hunyuan_avatar",
+		"description": "With the Hunyuan Video Avatar model you can animate a person based on the content of an audio input. Please note that the video generator works by processing 128 frames segment at a time (even if you ask less). The good news is that it will concatenate multiple segments for long video generation (max 3 segments recommended as the quality will get worse).",
+		"URLs": [
+			"https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_avatar_720_bf16.safetensors",
+			"https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_avatar_720_quanto_bf16_int8.safetensors"
+		]
+	}
+}

defaults/hunyuan_custom.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+	"model":
+	{
+		"name": "Hunyuan Video Custom 720p 13B",
+		"architecture" : "hunyuan_custom",
+		"description": "The Hunyuan Video Custom model is probably the best model to transfer people (only people for the moment) as it is quite good to keep their identity. However it is slow as to get good results, you need to generate 720p videos with 30 steps.",
+		"URLs": [
+			"https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_custom_720_bf16.safetensors",
+			"https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_custom_720_quanto_bf16_int8.safetensors"
+		]
+	}
+}

defaults/hunyuan_custom_audio.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+	"model":
+	{
+		"name": "Hunyuan Video Custom Audio 720p 13B",
+		"architecture" : "hunyuan_custom_audio",
+		"description": "The Hunyuan Video Custom Audio model can be used to generate scenes of a person speaking given a Reference Image and a Recorded Voice or Song. The reference image is not a start image and therefore one can represent the person in a different context.The video length can be anything up to 10s. It is also quite good to generate no sound Video based on a person.",
+		"URLs": [
+			"https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_custom_audio_720_bf16.safetensors",
+			"https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_custom_audio_720_quanto_bf16_int8.safetensors"
+		]
+	}
+}

defaults/hunyuan_custom_edit.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+	"model":
+	{
+		"name": "Hunyuan Video Custom Edit 720p 13B",
+		"architecture" : "hunyuan_custom_edit",
+		"description": "The Hunyuan Video Custom Edit model can be used to do Video inpainting on a person (add accessories or completely replace the person). You will need in any case to define a Video Mask which will indicate which area of the Video should be edited.",
+		"URLs": [
+			"https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_custom_edit_720_bf16.safetensors",
+			"https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_custom_edit_720_quanto_bf16_int8.safetensors"
+		]
+	}
+}

defaults/hunyuan_i2v.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+	"model":
+	{
+		"name": "Hunyuan Video Image2video 720p 13B",
+		"architecture" : "hunyuan_i2v",
+		"description": "A good looking image 2 video model, but not so good in prompt adherence.",
+		"URLs": [
+			"https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_i2v_720_bf16v2.safetensors",
+			"https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_i2v_720_quanto_int8v2.safetensors"
+		]
+	}
+}

defaults/hunyuan_t2v_accvideo.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "model": {
+    "name": "Hunyuan Video Text2video 720p AccVideo 13B",
+    "architecture": "hunyuan",
+    "description": " AccVideo is a novel efficient distillation method to accelerate video diffusion models with synthetic datset. Our method is 8.5x faster than HunyuanVideo.",
+    "URLs": [
+      "https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/accvideo_hunyuan_video_720_quanto_int8.safetensors"
+    ],
+    "preload_URLs": [
+    ],
+    "auto_quantize": true
+  },
+  "negative_prompt": "",
+  "resolution": "832x480",
+  "video_length": 81,
+  "seed": 42,
+  "num_inference_steps": 5,
+  "flow_shift": 7,
+  "embedded_guidance_scale": 6,
+  "repeat_generation": 1,
+  "loras_multipliers": "",
+  "temporal_upsampling": "",
+  "spatial_upsampling": "",
+  "RIFLEx_setting": 0,
+  "slg_start_perc": 10,
+  "slg_end_perc": 90,
+  "prompt_enhancer": "",
+  "activated_loras": [
+  ]
+}

defaults/hunyuan_t2v_fast.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+  "model": {
+    "name": "Hunyuan Video Text2video 720p FastHunyuan 13B",
+    "architecture": "hunyuan",
+    "description": "Fast Hunyuan is an accelerated HunyuanVideo model. It can sample high quality videos with 6 diffusion steps.",
+	"settings_dir": [ "" ],
+    "URLs": [
+      "https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/fast_hunyuan_video_720_quanto_int8.safetensors"
+    ],
+    "preload_URLs": [
+      "https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/fast_hunyuan_video_720_quanto_int8_map.json"
+    ],
+    "auto_quantize": true
+  },
+  "negative_prompt": "",
+  "resolution": "832x480",
+  "video_length": 81,
+  "seed": 42,
+  "num_inference_steps": 6,
+  "flow_shift": 17,
+  "embedded_guidance_scale": 6,
+  "repeat_generation": 1,
+  "loras_multipliers": "",
+  "temporal_upsampling": "",
+  "spatial_upsampling": "",
+  "RIFLEx_setting": 0,
+  "slg_start_perc": 10,
+  "slg_end_perc": 90,
+  "prompt_enhancer": "",
+  "activated_loras": [
+  ]
+}

defaults/i2v.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+	"model":
+	{
+		"name": "Wan2.1 Image2video 480p 14B",
+		"architecture" : "i2v",
+		"description": "The standard Wan Image 2 Video specialized to generate 480p images. It also offers Start and End Image support (End Image is not supported in the original model but seems to work well)",
+		"URLs": [
+			"https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_image2video_480p_14B_mbf16.safetensors",
+			"https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_image2video_480p_14B_quanto_mbf16_int8.safetensors",
+			"https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_image2video_480p_14B_quanto_mfp16_int8.safetensors"
+		]
+	}
+}

defaults/i2v_2_2.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+	"model":
+	{
+		"name": "Wan2.2 Image2video 14B",
+		"architecture" : "i2v_2_2",
+		"description": "Wan 2.2 Image 2 Video model. Contrary to the Wan Image2video 2.1 this model is structurally close to the t2v model. You will need consequently to store Loras for this model in the t2v Lora Folder.",
+		"URLs": [
+			"https://huggingface.co/DeepBeepMeep/Wan2.2/resolve/main/wan2.2_image2video_14B_high_mbf16.safetensors",
+			"https://huggingface.co/DeepBeepMeep/Wan2.2/resolve/main/wan2.2_image2video_14B_high_quanto_mbf16_int8.safetensors",
+			"https://huggingface.co/DeepBeepMeep/Wan2.2/resolve/main/wan2.2_image2video_14B_high_quanto_mfp16_int8.safetensors"
+		],
+		"URLs2": [
+			"https://huggingface.co/DeepBeepMeep/Wan2.2/resolve/main/wan2.2_image2video_14B_low_mbf16.safetensors",
+			"https://huggingface.co/DeepBeepMeep/Wan2.2/resolve/main/wan2.2_image2video_14B_low_quanto_mbf16_int8.safetensors",
+			"https://huggingface.co/DeepBeepMeep/Wan2.2/resolve/main/wan2.2_image2video_14B_low_quanto_mfp16_int8.safetensors"
+		],
+		"group": "wan2_2"
+	},
+	"guidance_phases": 2,
+	"switch_threshold" : 900,
+	"guidance_scale" : 3.5,
+	"guidance2_scale" : 3.5,
+	"flow_shift" : 5
+}

defaults/i2v_2_2_multitalk.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+	"model":
+	{
+		"name": "Wan2.2 Multitalk 14B",
+		"architecture" : "i2v_2_2_multitalk",
+		"description": "The Multitalk module of Wan 2.1 has been combined with the Wan 2.2 image 2 video. It lets you have up to two people have a conversation.",
+		"modules": ["multitalk"],
+		"URLs": "i2v_2_2",
+		"URLs2": "i2v_2_2",
+		"group": "wan2_2",
+		"visible": false
+	},
+	"switch_threshold" : 900,
+	"guidance_scale" : 3.5,
+	"guidance2_scale" : 3.5,
+	"flow_shift" : 5
+}

defaults/i2v_720p.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+	"model":
+	{
+		"name": "Wan2.1 Image2video 720p 14B",
+		"architecture" : "i2v",
+		"description": "The standard Wan Image 2 Video specialized to generate 720p images. It also offers Start and End Image support (End Image is not supported in the original model but seems to work well).",
+		"URLs": [
+			"https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_image2video_720p_14B_mbf16.safetensors",
+			"https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_image2video_720p_14B_quanto_mbf16_int8.safetensors",
+			"https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_image2video_720p_14B_quanto_mfp16_int8.safetensors"
+		]
+	},
+    "resolution": "1280x720"
+}

defaults/i2v_fusionix.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+	"model":
+	{
+		"name": "Wan2.1 Image2video 480p FusioniX 14B",
+		"architecture" : "i2v",
+		"description": "A powerful merged image-to-video model based on the original WAN 2.1 I2V model, enhanced using multiple open-source components and LoRAs to boost motion realism, temporal consistency, and expressive detail.",
+		"URLs": "i2v",
+		"settings_dir": [ "" ],
+		"loras": ["https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/loras_accelerators/Wan2.1_I2V_14B_FusionX_LoRA.safetensors"]
+	}
+}

defaults/infinitetalk.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+    "model": {
+        "name": "Infinitetalk Single Speaker 480p 14B",
+        "architecture": "infinitetalk",
+        "modules": [
+            [
+                "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_infinitetalk_single_14B_mbf16.safetensors",
+                "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_infinitetalk_single_14B_quanto_mbf16_int8.safetensors",
+                "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_infinitetalk_single_14B_quanto_mfp16_int8.safetensors"
+            ]
+        ],
+        "description": "The Infinitetalk model is an improved version of Multitalk that supports very long videos. This is the single speaker version. Sliding Window size must be 81 frames to get smooth transitions between shots.",
+		"one_speaker_only": true,
+        "URLs": "i2v"
+    }
+}

defaults/infinitetalk_multi.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+    "model": {
+        "name": "Infinitetalk Multi Speakers 480p 14B",
+        "architecture": "infinitetalk",
+        "modules": [
+            [
+                "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_infinitetalk_multi_14B_mbf16.safetensors",
+                "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_infinitetalk_multi_14B_quanto_mfp16_int8.safetensors",
+                "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_infinitetalk_multi_14B_quanto_mbf16_int8.safetensors"
+            ]
+        ],
+        "description": "The Infinitetalk model is an improved version of Multitalk that supports very long videos. This is the multi speakers version.Sliding Window size must be 81 frames to get smooth transitions between shots",
+		"multi_speakers_only": true,
+        "URLs": "i2v"
+    }
+}

defaults/ltxv_13B.json ADDED Viewed

	@@ -0,0 +1,19 @@

+{
+	"model":
+	{
+		"name": "LTX Video 0.9.8 13B",
+		"architecture" : "ltxv_13B",
+		"description": "LTX Video is a fast model that can be used to generate very very long videos (up to 1800 frames !).It is recommended to keep the number of steps to 30 or you will need to update the file 'ltxv_video/configs/ltxv-13b-0.9.8-dev.yaml'.The LTX Video model expects very long prompts, so don't hesitate to use the Prompt Enhancer.",
+		"URLs": [
+			"https://huggingface.co/DeepBeepMeep/LTX_Video/resolve/main/ltxv_0.9.8_13B_dev_bf16.safetensors",
+			"https://huggingface.co/DeepBeepMeep/LTX_Video/resolve/main/ltxv_0.9.8_13B_dev_quanto_bf16_int8.safetensors"
+		],
+		"preload_URLs" : [
+			"https://huggingface.co/DeepBeepMeep/LTX_Video/resolve/main/ltxv-097-ic-lora-pose-control-diffusers.safetensors",
+			"https://huggingface.co/DeepBeepMeep/LTX_Video/resolve/main/ltxv-097-ic-lora-depth-control-diffusers.safetensors",
+			"https://huggingface.co/DeepBeepMeep/LTX_Video/resolve/main/ltxv-097-ic-lora-canny-control-diffusers.safetensors"
+		],
+		"LTXV_config": "models/ltx_video/configs/ltxv-13b-0.9.8-dev.yaml"
+	},
+	"num_inference_steps": 30
+}

defaults/ltxv_distilled.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+	"model":
+	{
+		"name": "LTX Video 0.9.8 Distilled 13B",
+		"architecture" : "ltxv_13B",
+		"description": "LTX Video is a fast model that can be used to generate very long videos (up to 1800 frames !).This distilled version is a very fast version and retains a high level of quality. The LTX Video model expects very long prompts, so don't hesitate to use the Prompt Enhancer.",
+		"URLs": [
+			"https://huggingface.co/DeepBeepMeep/LTX_Video/resolve/main/ltxv_0.9.8_13B_distilled_bf16.safetensors",
+			"https://huggingface.co/DeepBeepMeep/LTX_Video/resolve/main/ltxv_0.9.8_13B_distilled_quanto_bf16_int8.safetensors"
+			],
+		"preload_URLs" : "ltxv_13B",
+		"LTXV_config": "models/ltx_video/configs/ltxv-13b-0.9.8-distilled.yaml"
+	},
+	"num_inference_steps": 6
+}

defaults/lucy_edit.json ADDED Viewed

	@@ -0,0 +1,20 @@

+{
+    "model": {
+        "name": "Wan2.2 Lucy Edit 5B",
+        "architecture": "lucy_edit",
+        "description": "Lucy Edit is a video editing model that performs instruction-guided edits on videos using free-text prompts. It supports a variety of edits, such as clothing & accessory changes, character changes, object insertions, and scene replacements while preserving the motion and composition perfectly.",
+        "URLs": [
+            "https://huggingface.co/DeepBeepMeep/Wan2.2/resolve/main/wan2.2_lucy_edit_mbf16.safetensors",
+            "https://huggingface.co/DeepBeepMeep/Wan2.2/resolve/main/wan2.2_lucy_edit_quanto_mbf16_int8.safetensors",
+            "https://huggingface.co/DeepBeepMeep/Wan2.2/resolve/main/wan2.2_lucy_edit_quanto_mfp16_int8.safetensors"
+        ],
+		"settings_dir": "ti2v_2_2",
+        "group": "wan2_2"
+    },
+	"prompt": "change the clothes to red",
+    "video_length": 81,
+    "guidance_scale": 5,
+    "flow_shift": 5,
+    "num_inference_steps": 30,
+    "resolution": "1280x720"
+}

defaults/lucy_edit_fastwan.json ADDED Viewed

	@@ -0,0 +1,17 @@

+{
+    "model": {
+        "name": "Wan2.2 Lucy Edit FastWan 5B",
+        "architecture": "lucy_edit",
+        "description": "Lucy Edit is a video editing model that performs instruction-guided edits on videos using free-text prompts. It supports a variety of edits, such as clothing & accessory changes, character changes, object insertions, and scene replacements while preserving the motion and composition perfectly. This is the FastWan version for faster generation.",
+        "URLs": "lucy_edit",
+        "group": "wan2_2",
+		"settings_dir": [ "" ],
+		"loras": "ti2v_2_2_fastwan"
+    },
+	"prompt": "change the clothes to red",
+    "video_length": 81,
+    "guidance_scale": 1,
+    "flow_shift": 3,
+    "num_inference_steps": 5,
+    "resolution": "1280x720"
+}

defaults/lynx.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+    "model": {
+        "name": "Wan2.1 Lynx 14B",
+        "modules": [
+            [
+                "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_lynx_full_module_14B_bf16.safetensors",
+                "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_lynx_full_module_14B_quanto_bf16_int8.safetensors",
+                "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_lynx_full_module_14B_quanto_fp16_int8.safetensors"
+            ]
+        ],
+        "architecture": "lynx",
+        "description": "The Lynx ControlNet offers State of the Art Identity Preservation. You need to provide a Reference Image which is a close up of a person face to transfer this person in the Video.",
+        "URLs": "t2v",
+        "preload_URLs": [
+            "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_lynx_full_arc_resampler.safetensors"
+        ]
+    }
+}

defaults/moviigen.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+	"model":
+	{
+		"name": "MoviiGen 1080p 14B",
+		"architecture" : "t2v",
+		"description": "MoviiGen 1.1, a cutting-edge video generation model that excels in cinematic aesthetics and visual quality. Use it to generate videos in 720p or 1080p in the 21:9 ratio.",
+		"URLs": [
+             "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_moviigen1.1_14B_mbf16.safetensors",
+			 "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_moviigen1.1_14B_quanto_mbf16_int8.safetensors",
+			 "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_moviigen1.1_14B_quanto_mfp16_int8.safetensors"
+		],
+		"auto_quantize": true
+	},
+    "resolution": "1280x720",
+    "video_length": 81
+}

defaults/multitalk.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+	"model":
+	{
+		"name": "Multitalk 480p 14B",
+		"architecture" : "multitalk",
+		"modules": [
+					["https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_multitalk_14B_mbf16.safetensors",
+					 "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_multitalk_14B_quanto_mbf16_int8.safetensors",
+					 "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_multitalk_14B_quanto_mfp16_int8.safetensors"]
+		],
+		"description": "The Multitalk model corresponds to the original Wan image 2 video model combined with the Multitalk module. It lets you have up to two people have a conversation.",
+		"URLs": "i2v",
+		"teacache_coefficients" : [-3.02331670e+02,  2.23948934e+02, -5.25463970e+01,  5.87348440e+00, -2.01973289e-01]
+	}
+}

defaults/multitalk_720p.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+	"model":
+	{
+		"name": "Multitalk 720p 14B",
+		"architecture" : "multitalk",
+		"modules": ["multitalk"],
+		"description": "The Multitalk model corresponds to the original Wan image 2 video 720p model combined with the Multitalk module. It lets you have up to two people have a conversation.",
+		"URLs": "i2v_720p",
+		"teacache_coefficients" : [-114.36346466,   65.26524496,  -18.82220707,    4.91518089,   -0.23412683],
+		"auto_quantize": true
+	},
+    "resolution": "1280x720"
+}

defaults/ovi.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+    "model": {
+        "name": "Wan2.2 Ovi 10B",
+        "architecture": "ovi",
+        "description": "Ovi will generate an Audio soundtrack with the Video. It is specialized in speaking characters. Use the tags &lt;S&gt; and &lt;E&gt; to delimit the speaker words and &lt;AUDCAP&gt; and &lt;ENDAUDCAP&gt; to set the background noise.",
+        "URLs": [
+            "https://huggingface.co/DeepBeepMeep/Wan2.2/resolve/main/wan2.2_ovi_video_10B_bf16.safetensors",
+            "https://huggingface.co/DeepBeepMeep/Wan2.2/resolve/main/wan2.2_ovi_video_10B_quanto_bf16_int8.safetensors"
+        ],
+        "URLs2": [
+            "https://huggingface.co/DeepBeepMeep/Wan2.2/resolve/main/wan2.2_ovi_audio_10B_bf16.safetensors",
+            "https://huggingface.co/DeepBeepMeep/Wan2.2/resolve/main/wan2.2_ovi_audio_10B_quanto_bf16_int8.safetensors"
+        ]
+    },
+	"num_inference_steps": 30,
+	"prompt" : "A singer in a glittering jacket grips the microphone, sweat shining on his brow, and shouts, <S>The end is night<E>. The crowd roars in response, fists in the air. Behind him, a guitarist steps to the mic and adds to say <S>We must all find a bunker where to hide.<E>. The energy peaks as the lights flare brighter.. <AUDCAP>Electric guitar riffs, cheering crowd, shouted male voices.<ENDAUDCAP>"
+}

defaults/ovi_fastwan.json ADDED Viewed

	@@ -0,0 +1,17 @@

+{
+    "model": {
+        "name": "Wan2.2 Ovi FastWan 10B",
+        "architecture": "ovi",
+        "description": "Ovi will generate an Audio soundtrack with the Video. It is specialized in speaking characters. Use the tags &lt;S&gt; and &lt;E&gt; to delimit the speaker words and &lt;AUDCAP&gt; and &lt;ENDAUDCAP&gt; to set the background noise. This version of Ovi is accelerated by FastWan",
+        "URLs": "ovi",
+        "URLs2": "ovi",
+		"loras": ["https://huggingface.co/DeepBeepMeep/Wan2.2/resolve/main/loras_accelerators/Wan2_2_5B_FastWanFullAttn_lora_rank_128_bf16.safetensors"],
+		"loras_multipliers": [1]
+    },
+	"num_inference_steps": 6,
+	"prompt" : "A singer in a glittering jacket grips the microphone, sweat shining on his brow, and shouts, <S>The end is night<E>. The crowd roars in response, fists in the air. Behind him, a guitarist steps to the mic and adds to say <S>We must all find a bunker where to hide.<E>. The energy peaks as the lights flare brighter.. <AUDCAP>Electric guitar riffs, cheering crowd, shouted male voices.<ENDAUDCAP>",
+    "guidance_scale": 1,
+    "audio_guidance_scale": 1,
+    "flow_shift": 6,
+	"num_inference_steps": 6
+}

defaults/phantom_1.3B.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+	"model":
+	{
+		"name": "Phantom 1.3B",
+		"architecture" : "phantom_1.3B",
+		"description": "The Phantom model is specialized in transferring people or objects of your choice into a generated Video. It produces very nice results when used at 720p.",
+		"URLs": [
+			"https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2_1_phantom_1.3B_mbf16.safetensors"
+		]
+	}
+}