diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..8049651d2658ff02f940bb7c84cc46be2b69dee4
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,47 @@
+.*
+*.py[cod]
+# *.jpg
+*.jpeg
+# *.png
+*.gif
+*.bmp
+*.mp4
+*.mov
+*.mkv
+*.log
+*.zip
+*.pt
+*.pth
+*.ckpt
+*.safetensors
+#*.json
+# *.txt
+*.backup
+*.pkl
+*.html
+*.pdf
+*.whl
+*.exe
+cache
+__pycache__/
+storage/
+samples/
+!.gitignore
+!requirements.txt
+.DS_Store
+*DS_Store
+google/
+Wan2.1-T2V-14B/
+Wan2.1-T2V-1.3B/
+Wan2.1-I2V-14B-480P/
+Wan2.1-I2V-14B-720P/
+outputs/
+outputs2/
+gradio_outputs/
+ckpts/
+loras/
+loras_i2v/
+
+settings/
+
+wgp_config.json
diff --git a/Custom Resolutions Instructions.txt b/Custom Resolutions Instructions.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c11f25dc3d29d2142b1cb4254e9bc7562ec1835e
--- /dev/null
+++ b/Custom Resolutions Instructions.txt
@@ -0,0 +1,16 @@
+You can override the choice of Resolutions offered by WanGP, if you create a file "resolutions.json" in the main WanGP folder.
+This file is composed of a list of 2 elements sublists. Each 2 elements sublist should have the format ["Label", "WxH"] where W, H are respectively the Width and Height of the resolution. Please make sure that W and H are multiples of 16. The letter "x" should be placed inbetween these two dimensions.
+
+Here is below a sample "resolutions.json" file :
+
+[
+ ["1280x720 (16:9, 720p)", "1280x720"],
+ ["720x1280 (9:16, 720p)", "720x1280"],
+ ["1024x1024 (1:1, 720p)", "1024x1024"],
+ ["1280x544 (21:9, 720p)", "1280x544"],
+ ["544x1280 (9:21, 720p)", "544x1280"],
+ ["1104x832 (4:3, 720p)", "1104x832"],
+ ["832x1104 (3:4, 720p)", "832x1104"],
+ ["960x960 (1:1, 720p)", "960x960"],
+ ["832x480 (16:9, 480p)", "832x480"]
+]
\ No newline at end of file
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000000000000000000000000000000000000..927c579fd9240970de1a43a049ca0d29411cfecf
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,92 @@
+FROM nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04
+
+# Build arg for GPU architectures - specify which CUDA compute capabilities to compile for
+# Common values:
+# 7.0 - Tesla V100
+# 7.5 - RTX 2060, 2070, 2080, Titan RTX
+# 8.0 - A100, A800 (Ampere data center)
+# 8.6 - RTX 3060, 3070, 3080, 3090 (Ampere consumer)
+# 8.9 - RTX 4070, 4080, 4090 (Ada Lovelace)
+# 9.0 - H100, H800 (Hopper data center)
+# 12.0 - RTX 5070, 5080, 5090 (Blackwell) - Note: sm_120 architecture
+#
+# Examples:
+# RTX 3060: --build-arg CUDA_ARCHITECTURES="8.6"
+# RTX 4090: --build-arg CUDA_ARCHITECTURES="8.9"
+# Multiple: --build-arg CUDA_ARCHITECTURES="8.0;8.6;8.9"
+#
+# Note: Including 8.9 or 9.0 may cause compilation issues on some setups
+# Default includes 8.0 and 8.6 for broad Ampere compatibility
+ARG CUDA_ARCHITECTURES="8.0;8.6"
+
+ENV DEBIAN_FRONTEND=noninteractive
+
+# Install system dependencies
+RUN apt update && \
+ apt install -y \
+ python3 python3-pip git wget curl cmake ninja-build \
+ libgl1 libglib2.0-0 ffmpeg && \
+ apt clean
+
+WORKDIR /workspace
+
+COPY requirements.txt .
+
+# Upgrade pip first
+RUN pip install --upgrade pip setuptools wheel
+
+# Install requirements if exists
+RUN pip install -r requirements.txt
+
+# Install PyTorch with CUDA support
+RUN pip install --extra-index-url https://download.pytorch.org/whl/cu124 \
+ torch==2.6.0+cu124 torchvision==0.21.0+cu124
+
+# Install SageAttention from git (patch GPU detection)
+ENV TORCH_CUDA_ARCH_LIST="${CUDA_ARCHITECTURES}"
+ENV FORCE_CUDA="1"
+ENV MAX_JOBS="1"
+
+COPY <
+Made with ❤️ by DeepBeepMeep +
diff --git a/defaults/ReadMe.txt b/defaults/ReadMe.txt new file mode 100644 index 0000000000000000000000000000000000000000..c98ee2ec959c9fca2bf66d3f5d63a91bc4f5c337 --- /dev/null +++ b/defaults/ReadMe.txt @@ -0,0 +1,13 @@ +Please dot not modify any file in this Folder. + +If you want to change a property of a default model, copy the corrresponding model file in the ./finetunes folder and modify the properties you want to change in the new file. +If a property is not in the new file, it will be inherited automatically from the default file that matches the same name file. + +For instance to hide a model: + +{ + "model": + { + "visible": false + } +} diff --git a/defaults/alpha.json b/defaults/alpha.json new file mode 100644 index 0000000000000000000000000000000000000000..641fbde35db5c231421fdfdcbc817a20b60e0c0c --- /dev/null +++ b/defaults/alpha.json @@ -0,0 +1,19 @@ +{ + "model": + { + "name": "Wan2.1 Alpha 14B", + "architecture" : "alpha", + "description": "This model successfully generates various scenes with accurate and clearly rendered transparency. Notably, it can synthesize diverse semi-transparent objects, glowing effects, and fine-grained details such as hair. For each video generated you will find a Zip file with the same name that will contain the corresponding RGBA images.", + "URLs": "t2v", + "preload_URLs": [ + "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan_alpha_2.1_vae_rgb_channel.safetensors", + "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan_alpha_2.1_vae_alpha_channel.safetensors" + ], + "loras": [ + "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan_alpha_2.1_dora.safetensors" + ], + "loras_multipliers": [ 1 ] + }, + "prompt": "A large orange octopus is seen resting. The background of the video is transparent." + +} \ No newline at end of file diff --git a/defaults/alpha_sf.json b/defaults/alpha_sf.json new file mode 100644 index 0000000000000000000000000000000000000000..4d273b92b808d64b440f7b41fc8393846bf0525a --- /dev/null +++ b/defaults/alpha_sf.json @@ -0,0 +1,17 @@ +{ + "model": + { + "name": "Wan2.1 Alpha Lightning 14B", + "architecture" : "alpha", + "description": "This model is accelerated by the Lightning / SelfForcing process. It successfully generates various scenes with accurate and clearly rendered transparency. Notably, it can synthesize diverse semi-transparent objects, glowing effects, and fine-grained details such as hair. For each video generated you will find a Zip file with the same name that will contain the corresponding RGBA images.", + "URLs": "t2v_sf", + "preload_URLs": "alpha", + "loras": "alpha", + "loras_multipliers": [ 1 ], + "profiles_dir" : [""] + }, + "prompt": "A large orange octopus is seen resting. The background of the video is transparent.", + "num_inference_steps": 4, + "guidance_scale": 1, + "flow_shift": 3 +} \ No newline at end of file diff --git a/defaults/animate.json b/defaults/animate.json new file mode 100644 index 0000000000000000000000000000000000000000..bdcb6fefd14e4d2d2b98345c9b91e465c8c461bf --- /dev/null +++ b/defaults/animate.json @@ -0,0 +1,17 @@ +{ + "model": { + "name": "Wan2.2 Animate 14B", + "architecture": "animate", + "description": "Wan-Animate takes a video and a character image as input, and generates a video in either 'Animation' or 'Replacement' mode. Sliding Window of 81 frames at least are recommeded to obtain the best Style continuity.", + "URLs": [ + "https://huggingface.co/DeepBeepMeep/Wan2.2/resolve/main/wan2.2_animate_14B_bf16.safetensors", + "https://huggingface.co/DeepBeepMeep/Wan2.2/resolve/main/wan2.2_animate_14B_quanto_fp16_int8.safetensors", + "https://huggingface.co/DeepBeepMeep/Wan2.2/resolve/main/wan2.2_animate_14B_quanto_bf16_int8.safetensors" + ], + "preload_URLs" : + [ + "https://huggingface.co/DeepBeepMeep/Wan2.2/resolve/main/wan2.2_animate_relighting_lora.safetensors" + ], + "group": "wan2_2" + } +} \ No newline at end of file diff --git a/defaults/chatterbox.json b/defaults/chatterbox.json new file mode 100644 index 0000000000000000000000000000000000000000..1030f8759bd892978d017606f6b605dd182e155d --- /dev/null +++ b/defaults/chatterbox.json @@ -0,0 +1,18 @@ +{ + "model": { + "name": "TTS Chatterbox Multilingual", + "architecture": "chatterbox", + "description": "Resemble AI's open multilingual TTS with language selection via model mode.", + "URLs": [] + }, + "prompt": "Welcome to Chatterbox !", + "negative_prompt": "", + "audio_prompt_type": "A", + "model_mode": "en", + "repeat_generation": 1, + "video_length": 0, + "num_inference_steps": 0, + "pace": 0.5, + "exaggeration": 0.5, + "temperature": 0.8 +} diff --git a/defaults/fantasy.json b/defaults/fantasy.json new file mode 100644 index 0000000000000000000000000000000000000000..fc09cee9fd76acc378d7c3829a6f34eed7bc8ff2 --- /dev/null +++ b/defaults/fantasy.json @@ -0,0 +1,11 @@ +{ + "model": + { + "name": "Fantasy Talking 720p 14B", + "architecture" : "fantasy", + "modules": [ ["https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_fantasy_speaking_14B_bf16.safetensors"]], + "description": "The Fantasy Talking model corresponds to the original Wan image 2 video model combined with the Fantasy Speaking module to process an audio Input.", + "URLs": "i2v_720p" + }, + "resolution": "1280x720" +} diff --git a/defaults/flf2v_720p.json b/defaults/flf2v_720p.json new file mode 100644 index 0000000000000000000000000000000000000000..b25c4387a2904774d54ae26095560d0d429ee38a --- /dev/null +++ b/defaults/flf2v_720p.json @@ -0,0 +1,16 @@ +{ + "model": + { + "name": "First Last Frame to Video 720p (FLF2V) 14B", + "architecture" : "flf2v_720p", + "visible" : true, + "description": "The First Last Frame 2 Video model is the official model Image 2 Video model that supports Start and End frames.", + "URLs": [ + "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_FLF2V_720p_14B_mbf16.safetensors", + "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_FLF2V_720p_14B_quanto_mbf16_int8.safetensors", + "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_FLF2V_720p_14B_quanto_mfp16_int8.safetensors" + ], + "auto_quantize": true + }, + "resolution": "1280x720" +} \ No newline at end of file diff --git a/defaults/flux.json b/defaults/flux.json new file mode 100644 index 0000000000000000000000000000000000000000..724ec1abb1efc27fd78956a82e8a2cc2f40aaf52 --- /dev/null +++ b/defaults/flux.json @@ -0,0 +1,15 @@ +{ + "model": { + "name": "Flux 1 Dev 12B", + "architecture": "flux", + "description": "FLUX.1 Dev is a 12 billion parameter rectified flow transformer capable of generating images from text descriptions.", + "URLs": [ + "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-dev_bf16.safetensors", + "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-dev_quanto_bf16_int8.safetensors" + ], + "image_outputs": true + }, + "prompt": "draw a hat", + "resolution": "1280x720", + "batch_size": 1 +} \ No newline at end of file diff --git a/defaults/flux_chroma.json b/defaults/flux_chroma.json new file mode 100644 index 0000000000000000000000000000000000000000..ebb8076be67fca39c6123124abc1a85548717e7d --- /dev/null +++ b/defaults/flux_chroma.json @@ -0,0 +1,17 @@ +{ + "model": { + "name": "Flux 1 Chroma 1 HD 8.9B", + "architecture": "flux_chroma", + "description": "FLUX.1 Chroma is a 8.9 billion parameters model. As a base model, Chroma1 is intentionally designed to be an excellent starting point for finetuning. It provides a strong, neutral foundation for developers, researchers, and artists to create specialized models..", + "URLs": [ + "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-chroma_hd_bf16.safetensors", + "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-chroma_hd_quanto_bf16_int8.safetensors" + ], + "image_outputs": true + }, + "prompt": "draw a hat", + "resolution": "1280x720", + "guidance_scale": 3.0, + "num_inference_steps": 20, + "batch_size": 1 +} \ No newline at end of file diff --git a/defaults/flux_dev_kontext.json b/defaults/flux_dev_kontext.json new file mode 100644 index 0000000000000000000000000000000000000000..cf3631a1f51d7c2a88b7e0d860220ced0afd2cb6 --- /dev/null +++ b/defaults/flux_dev_kontext.json @@ -0,0 +1,16 @@ +{ + "model": { + "name": "Flux 1 Dev Kontext 12B", + "architecture": "flux_dev_kontext", + "description": "FLUX.1 Kontext is a 12 billion parameter rectified flow transformer capable of editing images based on instructions stored in the Prompt. Please be aware that Flux Kontext is picky on the resolution of the input image and the output dimensions may not match the dimensions of the input image.", + "URLs": [ + "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1_kontext_dev_bf16.safetensors", + "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1_kontext_dev_quanto_bf16_int8.safetensors" + ] + }, + "prompt": "add a hat", + "resolution": "1280x720", + "batch_size": 1 +} + + \ No newline at end of file diff --git a/defaults/flux_dev_kontext_dreamomni2.json b/defaults/flux_dev_kontext_dreamomni2.json new file mode 100644 index 0000000000000000000000000000000000000000..75b0fcbac2aaca7627ad9398f9006ffd11acb3e9 --- /dev/null +++ b/defaults/flux_dev_kontext_dreamomni2.json @@ -0,0 +1,19 @@ +{ + "model": { + "name": "Flux 1 DreamOmni2 12B", + "architecture": "flux_dev_kontext_dreamomni2", + "description": "DreamOmni2 is a Multimodal Instruction-based Editing and Generation Model", + "URLs": [ + "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1_kontext_dev_bf16.safetensors", + "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1_kontext_dev_quanto_bf16_int8.safetensors" + ], + "preload_URLs": [ "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux_dreamomni2_edit_lora.safetensors", + "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux_dreamomni2_gen_lora.safetensors" + ] + }, + "prompt": "In the scene, the character from the first image stands on the left, and the character from the second image stands on the right. They are shaking hands against the backdrop of a spaceship interior.", + "resolution": "1280x720", + "batch_size": 1 +} + + \ No newline at end of file diff --git a/defaults/flux_dev_umo.json b/defaults/flux_dev_umo.json new file mode 100644 index 0000000000000000000000000000000000000000..a5e8e7d922e776a51cd7e73f61b30d12c369d083 --- /dev/null +++ b/defaults/flux_dev_umo.json @@ -0,0 +1,23 @@ +{ + "model": { + "name": "Flux 1 UMO Dev 12B", + "architecture": "flux_dev_umo", + "description": "FLUX.1 UMO Dev is a model that can Edit Images with a specialization in combining multiple image references (resized internally at 512x512 max) to produce an Image output. Best Image preservation at 768x768 Resolution Output.", + "URLs": "flux", + "loras": ["https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-dev-UMO_dit_lora_bf16.safetensors"], + "resolutions": [ ["1024x1024 (1:1)", "1024x1024"], + ["768x1024 (3:4)", "768x1024"], + ["1024x768 (4:3)", "1024x768"], + ["512x1024 (1:2)", "512x1024"], + ["1024x512 (2:1)", "1024x512"], + ["768x768 (1:1)", "768x768"], + ["768x512 (3:2)", "768x512"], + ["512x768 (2:3)", "512x768"]] + }, + "prompt": "the man is wearing a hat", + "embedded_guidance_scale": 4, + "resolution": "768x768", + "batch_size": 1 +} + + \ No newline at end of file diff --git a/defaults/flux_dev_uso.json b/defaults/flux_dev_uso.json new file mode 100644 index 0000000000000000000000000000000000000000..4b429210ef50820cd84a0aa6ef572cf713c218d3 --- /dev/null +++ b/defaults/flux_dev_uso.json @@ -0,0 +1,16 @@ +{ + "model": { + "name": "Flux 1 USO Dev 12B", + "architecture": "flux_dev_uso", + "description": "FLUX.1 USO Dev is a model that can Edit Images with a specialization in Style Transfers (up to two).", + "modules": [ ["https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-dev-USO_projector_bf16.safetensors"]], + "URLs": "flux", + "loras": ["https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-dev-USO_dit_lora_bf16.safetensors"] + }, + "prompt": "the man is wearing a hat", + "embedded_guidance_scale": 4, + "resolution": "1024x1024", + "batch_size": 1 +} + + \ No newline at end of file diff --git a/defaults/flux_krea.json b/defaults/flux_krea.json new file mode 100644 index 0000000000000000000000000000000000000000..669e1a595de29ea4d2bc6679f3e62ddf1b8fbb1c --- /dev/null +++ b/defaults/flux_krea.json @@ -0,0 +1,15 @@ +{ + "model": { + "name": "Flux 1 Dev Krea 12B", + "architecture": "flux", + "description": "Cutting-edge output quality, with a focus on aesthetic photography..", + "URLs": [ + "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-krea-dev_bf16.safetensors", + "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-krea-dev_quanto_bf16_int8.safetensors" + ], + "image_outputs": true + }, + "prompt": "draw a hat", + "resolution": "1280x720", + "batch_size": 1 +} \ No newline at end of file diff --git a/defaults/flux_schnell.json b/defaults/flux_schnell.json new file mode 100644 index 0000000000000000000000000000000000000000..1645a86ab56251a5bdb39bce24befc327ac0eaac --- /dev/null +++ b/defaults/flux_schnell.json @@ -0,0 +1,16 @@ +{ + "model": { + "name": "Flux 1 Schnell 12B", + "architecture": "flux_schnell", + "description": "FLUX.1 Schnell is a 12 billion parameter rectified flow transformer capable of generating images from text descriptions. As a distilled model it requires fewer denoising steps.", + "URLs": [ + "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-schnell_bf16.safetensors", + "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-schnell_quanto_bf16_int8.safetensors" + ], + "image_outputs": true + }, + "prompt": "draw a hat", + "resolution": "1280x720", + "num_inference_steps": 10, + "batch_size": 1 +} \ No newline at end of file diff --git a/defaults/flux_srpo.json b/defaults/flux_srpo.json new file mode 100644 index 0000000000000000000000000000000000000000..8b1c447e98ba63575615be70289f2117d18f7821 --- /dev/null +++ b/defaults/flux_srpo.json @@ -0,0 +1,14 @@ +{ + "model": { + "name": "Flux 1 Dev SRPO 12B", + "architecture": "flux", + "description": "By fine-tuning the FLUX.1.dev model with optimized denoising and online reward adjustment, SRPO improves its human-evaluated realism and aesthetic quality by over 3x.", + "URLs": [ + "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-srpo-dev_bf16.safetensors", + "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-srpo-dev_quanto_bf16_int8.safetensors" + ] + }, + "prompt": "draw a hat", + "resolution": "1024x1024", + "batch_size": 1 +} \ No newline at end of file diff --git a/defaults/flux_srpo_uso.json b/defaults/flux_srpo_uso.json new file mode 100644 index 0000000000000000000000000000000000000000..eed67bd36d97f4c151021360753f500003d30371 --- /dev/null +++ b/defaults/flux_srpo_uso.json @@ -0,0 +1,16 @@ +{ + "model": { + "name": "Flux 1 USO SRPO 12B", + "architecture": "flux_dev_uso", + "description": "FLUX.1 USO SRPO is a model that can Edit Images with a specialization in Style Transfers (up to two). It leverages the improved Image quality brought by the SRPO process", + "modules": [ "flux_dev_uso"], + "URLs": "flux_srpo", + "loras": "flux_dev_uso" + }, + "prompt": "the man is wearing a hat", + "embedded_guidance_scale": 4, + "resolution": "1024x1024", + "batch_size": 1 +} + + \ No newline at end of file diff --git a/defaults/fun_inp.json b/defaults/fun_inp.json new file mode 100644 index 0000000000000000000000000000000000000000..65330cd128661c6271705697997bd9780a93617c --- /dev/null +++ b/defaults/fun_inp.json @@ -0,0 +1,13 @@ +{ + "model": + { + "name": "Fun InP image2video 14B", + "architecture" : "fun_inp", + "description": "The Fun model is an alternative image 2 video that supports out the box End Image fixing (contrary to the original Wan image 2 video model).", + "URLs": [ + "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_Fun_InP_14B_bf16.safetensors", + "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_Fun_InP_14B_quanto_int8.safetensors", + "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_Fun_InP_14B_quanto_fp16_int8.safetensors" + ] + } +} diff --git a/defaults/fun_inp_1.3B.json b/defaults/fun_inp_1.3B.json new file mode 100644 index 0000000000000000000000000000000000000000..9d60e63e081c129f1744e8700d279d417de5d705 --- /dev/null +++ b/defaults/fun_inp_1.3B.json @@ -0,0 +1,11 @@ +{ + "model": + { + "name": "Fun InP image2video 1.3B", + "architecture" : "fun_inp_1.3B", + "description": "The Fun model is an alternative image 2 video that supports out the box End Image fixing (contrary to the original Wan image 2 video model). The 1.3B adds also image 2 to video capability to the 1.3B model.", + "URLs": [ + "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_Fun_InP_1.3B_bf16.safetensors" + ] + } +} \ No newline at end of file diff --git a/defaults/hunyuan.json b/defaults/hunyuan.json new file mode 100644 index 0000000000000000000000000000000000000000..a6ba832b82cbeb9974c983436ad2adb614643124 --- /dev/null +++ b/defaults/hunyuan.json @@ -0,0 +1,12 @@ +{ + "model": + { + "name": "Hunyuan Video Text2video 720p 13B", + "architecture" : "hunyuan", + "description": "Probably the best text 2 video model available.", + "URLs": [ + "https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_720_bf16.safetensors", + "https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_720_quanto_int8.safetensors" + ] + } +} \ No newline at end of file diff --git a/defaults/hunyuan_avatar.json b/defaults/hunyuan_avatar.json new file mode 100644 index 0000000000000000000000000000000000000000..d01c318fde0702b7e81f2d7478df3d260592ceb4 --- /dev/null +++ b/defaults/hunyuan_avatar.json @@ -0,0 +1,12 @@ +{ + "model": + { + "name": "Hunyuan Video Avatar 720p 13B", + "architecture" : "hunyuan_avatar", + "description": "With the Hunyuan Video Avatar model you can animate a person based on the content of an audio input. Please note that the video generator works by processing 128 frames segment at a time (even if you ask less). The good news is that it will concatenate multiple segments for long video generation (max 3 segments recommended as the quality will get worse).", + "URLs": [ + "https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_avatar_720_bf16.safetensors", + "https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_avatar_720_quanto_bf16_int8.safetensors" + ] + } +} \ No newline at end of file diff --git a/defaults/hunyuan_custom.json b/defaults/hunyuan_custom.json new file mode 100644 index 0000000000000000000000000000000000000000..d6217e9f5c6fdb2bef0a16f9fe9de6a18afa7563 --- /dev/null +++ b/defaults/hunyuan_custom.json @@ -0,0 +1,12 @@ +{ + "model": + { + "name": "Hunyuan Video Custom 720p 13B", + "architecture" : "hunyuan_custom", + "description": "The Hunyuan Video Custom model is probably the best model to transfer people (only people for the moment) as it is quite good to keep their identity. However it is slow as to get good results, you need to generate 720p videos with 30 steps.", + "URLs": [ + "https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_custom_720_bf16.safetensors", + "https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_custom_720_quanto_bf16_int8.safetensors" + ] + } +} \ No newline at end of file diff --git a/defaults/hunyuan_custom_audio.json b/defaults/hunyuan_custom_audio.json new file mode 100644 index 0000000000000000000000000000000000000000..f5c4d52345d24b83f83cb0c503965d064e50356e --- /dev/null +++ b/defaults/hunyuan_custom_audio.json @@ -0,0 +1,12 @@ +{ + "model": + { + "name": "Hunyuan Video Custom Audio 720p 13B", + "architecture" : "hunyuan_custom_audio", + "description": "The Hunyuan Video Custom Audio model can be used to generate scenes of a person speaking given a Reference Image and a Recorded Voice or Song. The reference image is not a start image and therefore one can represent the person in a different context.The video length can be anything up to 10s. It is also quite good to generate no sound Video based on a person.", + "URLs": [ + "https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_custom_audio_720_bf16.safetensors", + "https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_custom_audio_720_quanto_bf16_int8.safetensors" + ] + } +} \ No newline at end of file diff --git a/defaults/hunyuan_custom_edit.json b/defaults/hunyuan_custom_edit.json new file mode 100644 index 0000000000000000000000000000000000000000..9cf037e7eb1e927293488da57f2d2dcee51af1dd --- /dev/null +++ b/defaults/hunyuan_custom_edit.json @@ -0,0 +1,12 @@ +{ + "model": + { + "name": "Hunyuan Video Custom Edit 720p 13B", + "architecture" : "hunyuan_custom_edit", + "description": "The Hunyuan Video Custom Edit model can be used to do Video inpainting on a person (add accessories or completely replace the person). You will need in any case to define a Video Mask which will indicate which area of the Video should be edited.", + "URLs": [ + "https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_custom_edit_720_bf16.safetensors", + "https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_custom_edit_720_quanto_bf16_int8.safetensors" + ] + } +} \ No newline at end of file diff --git a/defaults/hunyuan_i2v.json b/defaults/hunyuan_i2v.json new file mode 100644 index 0000000000000000000000000000000000000000..44722da6b4445c79a7349eab72ff6681c62f1be7 --- /dev/null +++ b/defaults/hunyuan_i2v.json @@ -0,0 +1,12 @@ +{ + "model": + { + "name": "Hunyuan Video Image2video 720p 13B", + "architecture" : "hunyuan_i2v", + "description": "A good looking image 2 video model, but not so good in prompt adherence.", + "URLs": [ + "https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_i2v_720_bf16v2.safetensors", + "https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_i2v_720_quanto_int8v2.safetensors" + ] + } +} \ No newline at end of file diff --git a/defaults/hunyuan_t2v_accvideo.json b/defaults/hunyuan_t2v_accvideo.json new file mode 100644 index 0000000000000000000000000000000000000000..2da984a8da95167ef76a4f82d3ca219ff72f9972 --- /dev/null +++ b/defaults/hunyuan_t2v_accvideo.json @@ -0,0 +1,30 @@ +{ + "model": { + "name": "Hunyuan Video Text2video 720p AccVideo 13B", + "architecture": "hunyuan", + "description": " AccVideo is a novel efficient distillation method to accelerate video diffusion models with synthetic datset. Our method is 8.5x faster than HunyuanVideo.", + "URLs": [ + "https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/accvideo_hunyuan_video_720_quanto_int8.safetensors" + ], + "preload_URLs": [ + ], + "auto_quantize": true + }, + "negative_prompt": "", + "resolution": "832x480", + "video_length": 81, + "seed": 42, + "num_inference_steps": 5, + "flow_shift": 7, + "embedded_guidance_scale": 6, + "repeat_generation": 1, + "loras_multipliers": "", + "temporal_upsampling": "", + "spatial_upsampling": "", + "RIFLEx_setting": 0, + "slg_start_perc": 10, + "slg_end_perc": 90, + "prompt_enhancer": "", + "activated_loras": [ + ] +} \ No newline at end of file diff --git a/defaults/hunyuan_t2v_fast.json b/defaults/hunyuan_t2v_fast.json new file mode 100644 index 0000000000000000000000000000000000000000..4019e24ef7c0b32a02ab0b232c4abc75c5a40ec7 --- /dev/null +++ b/defaults/hunyuan_t2v_fast.json @@ -0,0 +1,32 @@ +{ + "model": { + "name": "Hunyuan Video Text2video 720p FastHunyuan 13B", + "architecture": "hunyuan", + "description": "Fast Hunyuan is an accelerated HunyuanVideo model. It can sample high quality videos with 6 diffusion steps.", + "settings_dir": [ "" ], + "URLs": [ + "https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/fast_hunyuan_video_720_quanto_int8.safetensors" + ], + "preload_URLs": [ + "https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/fast_hunyuan_video_720_quanto_int8_map.json" + ], + "auto_quantize": true + }, + "negative_prompt": "", + "resolution": "832x480", + "video_length": 81, + "seed": 42, + "num_inference_steps": 6, + "flow_shift": 17, + "embedded_guidance_scale": 6, + "repeat_generation": 1, + "loras_multipliers": "", + "temporal_upsampling": "", + "spatial_upsampling": "", + "RIFLEx_setting": 0, + "slg_start_perc": 10, + "slg_end_perc": 90, + "prompt_enhancer": "", + "activated_loras": [ + ] +} \ No newline at end of file diff --git a/defaults/i2v.json b/defaults/i2v.json new file mode 100644 index 0000000000000000000000000000000000000000..ba10691483c09a0ed34ff8769ad429ae182fb18b --- /dev/null +++ b/defaults/i2v.json @@ -0,0 +1,13 @@ +{ + "model": + { + "name": "Wan2.1 Image2video 480p 14B", + "architecture" : "i2v", + "description": "The standard Wan Image 2 Video specialized to generate 480p images. It also offers Start and End Image support (End Image is not supported in the original model but seems to work well)", + "URLs": [ + "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_image2video_480p_14B_mbf16.safetensors", + "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_image2video_480p_14B_quanto_mbf16_int8.safetensors", + "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_image2video_480p_14B_quanto_mfp16_int8.safetensors" + ] + } +} \ No newline at end of file diff --git a/defaults/i2v_2_2.json b/defaults/i2v_2_2.json new file mode 100644 index 0000000000000000000000000000000000000000..a032333eb44e54d1095eaf11887c558aa84cb923 --- /dev/null +++ b/defaults/i2v_2_2.json @@ -0,0 +1,25 @@ +{ + "model": + { + "name": "Wan2.2 Image2video 14B", + "architecture" : "i2v_2_2", + "description": "Wan 2.2 Image 2 Video model. Contrary to the Wan Image2video 2.1 this model is structurally close to the t2v model. You will need consequently to store Loras for this model in the t2v Lora Folder.", + "URLs": [ + "https://huggingface.co/DeepBeepMeep/Wan2.2/resolve/main/wan2.2_image2video_14B_high_mbf16.safetensors", + "https://huggingface.co/DeepBeepMeep/Wan2.2/resolve/main/wan2.2_image2video_14B_high_quanto_mbf16_int8.safetensors", + "https://huggingface.co/DeepBeepMeep/Wan2.2/resolve/main/wan2.2_image2video_14B_high_quanto_mfp16_int8.safetensors" + ], + "URLs2": [ + "https://huggingface.co/DeepBeepMeep/Wan2.2/resolve/main/wan2.2_image2video_14B_low_mbf16.safetensors", + "https://huggingface.co/DeepBeepMeep/Wan2.2/resolve/main/wan2.2_image2video_14B_low_quanto_mbf16_int8.safetensors", + "https://huggingface.co/DeepBeepMeep/Wan2.2/resolve/main/wan2.2_image2video_14B_low_quanto_mfp16_int8.safetensors" + ], + "group": "wan2_2" + }, + "guidance_phases": 2, + "switch_threshold" : 900, + "guidance_scale" : 3.5, + "guidance2_scale" : 3.5, + "flow_shift" : 5 + +} \ No newline at end of file diff --git a/defaults/i2v_2_2_multitalk.json b/defaults/i2v_2_2_multitalk.json new file mode 100644 index 0000000000000000000000000000000000000000..9326469ffb2300a0a4427a971cba14ba9c0d543f --- /dev/null +++ b/defaults/i2v_2_2_multitalk.json @@ -0,0 +1,18 @@ +{ + "model": + { + "name": "Wan2.2 Multitalk 14B", + "architecture" : "i2v_2_2_multitalk", + "description": "The Multitalk module of Wan 2.1 has been combined with the Wan 2.2 image 2 video. It lets you have up to two people have a conversation.", + "modules": ["multitalk"], + "URLs": "i2v_2_2", + "URLs2": "i2v_2_2", + "group": "wan2_2", + "visible": false + }, + "switch_threshold" : 900, + "guidance_scale" : 3.5, + "guidance2_scale" : 3.5, + "flow_shift" : 5 + +} \ No newline at end of file diff --git a/defaults/i2v_720p.json b/defaults/i2v_720p.json new file mode 100644 index 0000000000000000000000000000000000000000..844aab9884efe22aaeb1c9b4aa1b38dc656e5098 --- /dev/null +++ b/defaults/i2v_720p.json @@ -0,0 +1,14 @@ +{ + "model": + { + "name": "Wan2.1 Image2video 720p 14B", + "architecture" : "i2v", + "description": "The standard Wan Image 2 Video specialized to generate 720p images. It also offers Start and End Image support (End Image is not supported in the original model but seems to work well).", + "URLs": [ + "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_image2video_720p_14B_mbf16.safetensors", + "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_image2video_720p_14B_quanto_mbf16_int8.safetensors", + "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_image2video_720p_14B_quanto_mfp16_int8.safetensors" + ] + }, + "resolution": "1280x720" +} \ No newline at end of file diff --git a/defaults/i2v_fusionix.json b/defaults/i2v_fusionix.json new file mode 100644 index 0000000000000000000000000000000000000000..8b0a8af548e1b7c3db5eab10a599d509efbc6b19 --- /dev/null +++ b/defaults/i2v_fusionix.json @@ -0,0 +1,11 @@ +{ + "model": + { + "name": "Wan2.1 Image2video 480p FusioniX 14B", + "architecture" : "i2v", + "description": "A powerful merged image-to-video model based on the original WAN 2.1 I2V model, enhanced using multiple open-source components and LoRAs to boost motion realism, temporal consistency, and expressive detail.", + "URLs": "i2v", + "settings_dir": [ "" ], + "loras": ["https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/loras_accelerators/Wan2.1_I2V_14B_FusionX_LoRA.safetensors"] + } +} \ No newline at end of file diff --git a/defaults/infinitetalk.json b/defaults/infinitetalk.json new file mode 100644 index 0000000000000000000000000000000000000000..fc28d96e5e1021ac1e9102fe5048401305106f70 --- /dev/null +++ b/defaults/infinitetalk.json @@ -0,0 +1,16 @@ +{ + "model": { + "name": "Infinitetalk Single Speaker 480p 14B", + "architecture": "infinitetalk", + "modules": [ + [ + "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_infinitetalk_single_14B_mbf16.safetensors", + "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_infinitetalk_single_14B_quanto_mbf16_int8.safetensors", + "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_infinitetalk_single_14B_quanto_mfp16_int8.safetensors" + ] + ], + "description": "The Infinitetalk model is an improved version of Multitalk that supports very long videos. This is the single speaker version. Sliding Window size must be 81 frames to get smooth transitions between shots.", + "one_speaker_only": true, + "URLs": "i2v" + } +} \ No newline at end of file diff --git a/defaults/infinitetalk_multi.json b/defaults/infinitetalk_multi.json new file mode 100644 index 0000000000000000000000000000000000000000..229ecc778bd7495f5575f3ca02ee7f0c5da2d0ef --- /dev/null +++ b/defaults/infinitetalk_multi.json @@ -0,0 +1,16 @@ +{ + "model": { + "name": "Infinitetalk Multi Speakers 480p 14B", + "architecture": "infinitetalk", + "modules": [ + [ + "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_infinitetalk_multi_14B_mbf16.safetensors", + "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_infinitetalk_multi_14B_quanto_mfp16_int8.safetensors", + "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_infinitetalk_multi_14B_quanto_mbf16_int8.safetensors" + ] + ], + "description": "The Infinitetalk model is an improved version of Multitalk that supports very long videos. This is the multi speakers version.Sliding Window size must be 81 frames to get smooth transitions between shots", + "multi_speakers_only": true, + "URLs": "i2v" + } +} \ No newline at end of file diff --git a/defaults/ltxv_13B.json b/defaults/ltxv_13B.json new file mode 100644 index 0000000000000000000000000000000000000000..639442e1aa2989d86a3d1574357d0cb3348afe18 --- /dev/null +++ b/defaults/ltxv_13B.json @@ -0,0 +1,19 @@ +{ + "model": + { + "name": "LTX Video 0.9.8 13B", + "architecture" : "ltxv_13B", + "description": "LTX Video is a fast model that can be used to generate very very long videos (up to 1800 frames !).It is recommended to keep the number of steps to 30 or you will need to update the file 'ltxv_video/configs/ltxv-13b-0.9.8-dev.yaml'.The LTX Video model expects very long prompts, so don't hesitate to use the Prompt Enhancer.", + "URLs": [ + "https://huggingface.co/DeepBeepMeep/LTX_Video/resolve/main/ltxv_0.9.8_13B_dev_bf16.safetensors", + "https://huggingface.co/DeepBeepMeep/LTX_Video/resolve/main/ltxv_0.9.8_13B_dev_quanto_bf16_int8.safetensors" + ], + "preload_URLs" : [ + "https://huggingface.co/DeepBeepMeep/LTX_Video/resolve/main/ltxv-097-ic-lora-pose-control-diffusers.safetensors", + "https://huggingface.co/DeepBeepMeep/LTX_Video/resolve/main/ltxv-097-ic-lora-depth-control-diffusers.safetensors", + "https://huggingface.co/DeepBeepMeep/LTX_Video/resolve/main/ltxv-097-ic-lora-canny-control-diffusers.safetensors" + ], + "LTXV_config": "models/ltx_video/configs/ltxv-13b-0.9.8-dev.yaml" + }, + "num_inference_steps": 30 +} diff --git a/defaults/ltxv_distilled.json b/defaults/ltxv_distilled.json new file mode 100644 index 0000000000000000000000000000000000000000..c570057289f03596ccc9fffe547f7ce7d407680b --- /dev/null +++ b/defaults/ltxv_distilled.json @@ -0,0 +1,15 @@ +{ + "model": + { + "name": "LTX Video 0.9.8 Distilled 13B", + "architecture" : "ltxv_13B", + "description": "LTX Video is a fast model that can be used to generate very long videos (up to 1800 frames !).This distilled version is a very fast version and retains a high level of quality. The LTX Video model expects very long prompts, so don't hesitate to use the Prompt Enhancer.", + "URLs": [ + "https://huggingface.co/DeepBeepMeep/LTX_Video/resolve/main/ltxv_0.9.8_13B_distilled_bf16.safetensors", + "https://huggingface.co/DeepBeepMeep/LTX_Video/resolve/main/ltxv_0.9.8_13B_distilled_quanto_bf16_int8.safetensors" + ], + "preload_URLs" : "ltxv_13B", + "LTXV_config": "models/ltx_video/configs/ltxv-13b-0.9.8-distilled.yaml" + }, + "num_inference_steps": 6 +} diff --git a/defaults/lucy_edit.json b/defaults/lucy_edit.json new file mode 100644 index 0000000000000000000000000000000000000000..57d3d958ab6f82da6f32dba5cbcc1e60df4c473c --- /dev/null +++ b/defaults/lucy_edit.json @@ -0,0 +1,20 @@ +{ + "model": { + "name": "Wan2.2 Lucy Edit 5B", + "architecture": "lucy_edit", + "description": "Lucy Edit is a video editing model that performs instruction-guided edits on videos using free-text prompts. It supports a variety of edits, such as clothing & accessory changes, character changes, object insertions, and scene replacements while preserving the motion and composition perfectly.", + "URLs": [ + "https://huggingface.co/DeepBeepMeep/Wan2.2/resolve/main/wan2.2_lucy_edit_mbf16.safetensors", + "https://huggingface.co/DeepBeepMeep/Wan2.2/resolve/main/wan2.2_lucy_edit_quanto_mbf16_int8.safetensors", + "https://huggingface.co/DeepBeepMeep/Wan2.2/resolve/main/wan2.2_lucy_edit_quanto_mfp16_int8.safetensors" + ], + "settings_dir": "ti2v_2_2", + "group": "wan2_2" + }, + "prompt": "change the clothes to red", + "video_length": 81, + "guidance_scale": 5, + "flow_shift": 5, + "num_inference_steps": 30, + "resolution": "1280x720" +} \ No newline at end of file diff --git a/defaults/lucy_edit_fastwan.json b/defaults/lucy_edit_fastwan.json new file mode 100644 index 0000000000000000000000000000000000000000..de2830c9fb6fe92ae78ba77e673ab9c02025e51d --- /dev/null +++ b/defaults/lucy_edit_fastwan.json @@ -0,0 +1,17 @@ +{ + "model": { + "name": "Wan2.2 Lucy Edit FastWan 5B", + "architecture": "lucy_edit", + "description": "Lucy Edit is a video editing model that performs instruction-guided edits on videos using free-text prompts. It supports a variety of edits, such as clothing & accessory changes, character changes, object insertions, and scene replacements while preserving the motion and composition perfectly. This is the FastWan version for faster generation.", + "URLs": "lucy_edit", + "group": "wan2_2", + "settings_dir": [ "" ], + "loras": "ti2v_2_2_fastwan" + }, + "prompt": "change the clothes to red", + "video_length": 81, + "guidance_scale": 1, + "flow_shift": 3, + "num_inference_steps": 5, + "resolution": "1280x720" +} \ No newline at end of file diff --git a/defaults/lynx.json b/defaults/lynx.json new file mode 100644 index 0000000000000000000000000000000000000000..528f5ef68300306279f6098bace1ceb24958a79e --- /dev/null +++ b/defaults/lynx.json @@ -0,0 +1,18 @@ +{ + "model": { + "name": "Wan2.1 Lynx 14B", + "modules": [ + [ + "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_lynx_full_module_14B_bf16.safetensors", + "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_lynx_full_module_14B_quanto_bf16_int8.safetensors", + "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_lynx_full_module_14B_quanto_fp16_int8.safetensors" + ] + ], + "architecture": "lynx", + "description": "The Lynx ControlNet offers State of the Art Identity Preservation. You need to provide a Reference Image which is a close up of a person face to transfer this person in the Video.", + "URLs": "t2v", + "preload_URLs": [ + "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_lynx_full_arc_resampler.safetensors" + ] + } +} \ No newline at end of file diff --git a/defaults/moviigen.json b/defaults/moviigen.json new file mode 100644 index 0000000000000000000000000000000000000000..96a04f8842e4183c6860bf7937eec4c9adf490af --- /dev/null +++ b/defaults/moviigen.json @@ -0,0 +1,16 @@ +{ + "model": + { + "name": "MoviiGen 1080p 14B", + "architecture" : "t2v", + "description": "MoviiGen 1.1, a cutting-edge video generation model that excels in cinematic aesthetics and visual quality. Use it to generate videos in 720p or 1080p in the 21:9 ratio.", + "URLs": [ + "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_moviigen1.1_14B_mbf16.safetensors", + "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_moviigen1.1_14B_quanto_mbf16_int8.safetensors", + "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_moviigen1.1_14B_quanto_mfp16_int8.safetensors" + ], + "auto_quantize": true + }, + "resolution": "1280x720", + "video_length": 81 +} \ No newline at end of file diff --git a/defaults/multitalk.json b/defaults/multitalk.json new file mode 100644 index 0000000000000000000000000000000000000000..41699b58458233444abd25a94be25bc112c4489d --- /dev/null +++ b/defaults/multitalk.json @@ -0,0 +1,15 @@ +{ + "model": + { + "name": "Multitalk 480p 14B", + "architecture" : "multitalk", + "modules": [ + ["https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_multitalk_14B_mbf16.safetensors", + "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_multitalk_14B_quanto_mbf16_int8.safetensors", + "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_multitalk_14B_quanto_mfp16_int8.safetensors"] + ], + "description": "The Multitalk model corresponds to the original Wan image 2 video model combined with the Multitalk module. It lets you have up to two people have a conversation.", + "URLs": "i2v", + "teacache_coefficients" : [-3.02331670e+02, 2.23948934e+02, -5.25463970e+01, 5.87348440e+00, -2.01973289e-01] + } +} \ No newline at end of file diff --git a/defaults/multitalk_720p.json b/defaults/multitalk_720p.json new file mode 100644 index 0000000000000000000000000000000000000000..f18bebc01907c668c30b7daa58d69829f1eaf76f --- /dev/null +++ b/defaults/multitalk_720p.json @@ -0,0 +1,13 @@ +{ + "model": + { + "name": "Multitalk 720p 14B", + "architecture" : "multitalk", + "modules": ["multitalk"], + "description": "The Multitalk model corresponds to the original Wan image 2 video 720p model combined with the Multitalk module. It lets you have up to two people have a conversation.", + "URLs": "i2v_720p", + "teacache_coefficients" : [-114.36346466, 65.26524496, -18.82220707, 4.91518089, -0.23412683], + "auto_quantize": true + }, + "resolution": "1280x720" +} diff --git a/defaults/ovi.json b/defaults/ovi.json new file mode 100644 index 0000000000000000000000000000000000000000..6ea19047062847a431016b5fa530162e58efb1c5 --- /dev/null +++ b/defaults/ovi.json @@ -0,0 +1,18 @@ +{ + "model": { + "name": "Wan2.2 Ovi 10B", + "architecture": "ovi", + "description": "Ovi will generate an Audio soundtrack with the Video. It is specialized in speaking characters. Use the tags <S> and <E> to delimit the speaker words and <AUDCAP> and <ENDAUDCAP> to set the background noise.", + "URLs": [ + "https://huggingface.co/DeepBeepMeep/Wan2.2/resolve/main/wan2.2_ovi_video_10B_bf16.safetensors", + "https://huggingface.co/DeepBeepMeep/Wan2.2/resolve/main/wan2.2_ovi_video_10B_quanto_bf16_int8.safetensors" + ], + "URLs2": [ + "https://huggingface.co/DeepBeepMeep/Wan2.2/resolve/main/wan2.2_ovi_audio_10B_bf16.safetensors", + "https://huggingface.co/DeepBeepMeep/Wan2.2/resolve/main/wan2.2_ovi_audio_10B_quanto_bf16_int8.safetensors" + ] + }, + "num_inference_steps": 30, + "prompt" : "A singer in a glittering jacket grips the microphone, sweat shining on his brow, and shouts,Click 'Refresh Files' to load gallery.
Failed to load community plugins.
" + except json.JSONDecodeError: + gr.Warning("Failed to parse the community plugins list. The file may be malformed.") + return "Error reading community plugins list.
" + + if not community_plugins: + return "All available community plugins are already installed.
" + + items_html = "" + for plugin in community_plugins: + name = plugin.get('name') + author = plugin.get('author') + version = plugin.get('version', 'N/A') + description = plugin.get('description') + url = plugin.get('url') + + if not all([name, author, description, url]): + continue + + safe_url = url.replace("'", "\\'") + + items_html += f""" +No user-installed plugins found.
" + else: + user_plugins_map = {p['id']: p for p in all_user_plugins_info} + user_plugins = [] + for plugin_id in enabled_user_plugins: + if plugin_id in user_plugins_map: + user_plugins.append(user_plugins_map.pop(plugin_id)) + user_plugins.extend(sorted(user_plugins_map.values(), key=lambda p: p['name'])) + + user_items_html = "" + for plugin in user_plugins: + plugin_id = plugin['id'] + checked = "checked" if plugin_id in enabled_user_plugins else "" + user_items_html += f""" +| Qty | +Prompt | +Length | +Steps | +Start/Ref | +End | ++ | + |
|---|---|---|---|---|---|---|---|
| {item.get('repeats', "1")} | +{prompt_cell} | +{length} | +{num_steps} | +{start_img_md} | +{end_img_md} | +{edit_btn} | +{remove_btn} | +
| " + prompt + " | " + thumbnails + "