Update app.py
Browse files
app.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
# app.py
|
| 2 |
-
# Hugging Face Space (Gradio) for Lightricks/LTX-Video
|
| 3 |
# Requirements (add to requirements.txt in the Space):
|
| 4 |
# torch>=2.1.2, diffusers, transformers, accelerate, safetensors, einops, gradio, huggingface_hub, opencv-python
|
| 5 |
|
|
@@ -12,34 +12,45 @@ import gradio as gr
|
|
| 12 |
from diffusers import LTXConditionPipeline, LTXLatentUpsamplePipeline
|
| 13 |
from diffusers.utils import export_to_video, load_image, load_video
|
| 14 |
|
| 15 |
-
# Map of friendly model ids to HF repo ids
|
| 16 |
MODEL_MAP = {
|
| 17 |
-
"13B (distilled)": "Lightricks/LTX-Video-0.9.8-13B-distilled"
|
|
|
|
| 18 |
}
|
| 19 |
|
| 20 |
-
HF_TOKEN = os.environ.get("HF_TOKEN") #
|
| 21 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
| 22 |
|
| 23 |
@lru_cache(maxsize=4)
|
| 24 |
def load_pipes(repo_id: str, torch_dtype_str: str = "bfloat16"):
|
| 25 |
-
# Load both the main pipe and the latent upsampler when available
|
| 26 |
dtype = getattr(torch, torch_dtype_str, torch.bfloat16)
|
| 27 |
-
pipe = LTXConditionPipeline.from_pretrained(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
up_id = repo_id.replace("LTX-Video-", "ltxv-spatial-upscaler-")
|
| 29 |
try:
|
| 30 |
-
up = LTXLatentUpsamplePipeline.from_pretrained(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
except Exception:
|
| 32 |
up = None
|
| 33 |
-
if DEVICE == "cuda":
|
| 34 |
-
pipe.to("cuda")
|
| 35 |
-
if up is not None:
|
| 36 |
-
up.to("cuda")
|
| 37 |
return pipe, up
|
| 38 |
|
| 39 |
|
| 40 |
def sanitize_size(h, w):
|
| 41 |
h, w = int(h), int(w)
|
| 42 |
-
# model expects multiples constrained by vae; we'll let the pipeline handle padding but avoid ridiculous sizes
|
| 43 |
h = max(64, min(1080, h))
|
| 44 |
w = max(64, min(2048, w))
|
| 45 |
return h, w
|
|
@@ -52,9 +63,6 @@ def generate(prompt, conditioning_file, height, width, num_frames, steps, seed,
|
|
| 52 |
repo_id = MODEL_MAP.get(model_choice, list(MODEL_MAP.values())[0])
|
| 53 |
torch_dtype = "bfloat16" if DEVICE == "cuda" else "float32"
|
| 54 |
|
| 55 |
-
with gr.Row():
|
| 56 |
-
pass
|
| 57 |
-
|
| 58 |
pipe, up = load_pipes(repo_id, torch_dtype_str=torch_dtype)
|
| 59 |
|
| 60 |
height, width = sanitize_size(height, width)
|
|
@@ -63,24 +71,20 @@ def generate(prompt, conditioning_file, height, width, num_frames, steps, seed,
|
|
| 63 |
|
| 64 |
generator = torch.Generator(device=DEVICE).manual_seed(int(seed) if seed else random.randint(0, 2**31 - 1))
|
| 65 |
|
| 66 |
-
# prepare conditioning
|
| 67 |
conditions = []
|
| 68 |
if conditioning_file is not None:
|
| 69 |
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(conditioning_file.name)[1])
|
| 70 |
tmp.write(conditioning_file.read())
|
| 71 |
tmp.flush()
|
| 72 |
tmp.close()
|
| 73 |
-
# try to load as image, otherwise as video
|
| 74 |
try:
|
| 75 |
img = load_image(tmp.name)
|
| 76 |
video_cond = export_to_video([img])
|
| 77 |
video = load_video(video_cond)
|
| 78 |
except Exception:
|
| 79 |
video = load_video(tmp.name)
|
| 80 |
-
# use first frame as condition example
|
| 81 |
conditions.append((video, 0))
|
| 82 |
|
| 83 |
-
# Build LTXVideoCondition objects
|
| 84 |
from diffusers.pipelines.ltx.pipeline_ltx_condition import LTXVideoCondition
|
| 85 |
ltx_conditions = []
|
| 86 |
for vid, frame_idx in conditions:
|
|
@@ -88,7 +92,6 @@ def generate(prompt, conditioning_file, height, width, num_frames, steps, seed,
|
|
| 88 |
|
| 89 |
negative_prompt = "worst quality, inconsistent motion, blurry, jittery, distorted"
|
| 90 |
|
| 91 |
-
# Part 1: generate at downscaled resolution for speed (recommended in model card)
|
| 92 |
downscale = 2 / 3
|
| 93 |
down_h, down_w = int(height * downscale), int(width * downscale)
|
| 94 |
latents = pipe(
|
|
@@ -103,13 +106,11 @@ def generate(prompt, conditioning_file, height, width, num_frames, steps, seed,
|
|
| 103 |
output_type="latent",
|
| 104 |
).frames
|
| 105 |
|
| 106 |
-
# Part 2: upsample latents (if available)
|
| 107 |
if up is not None:
|
| 108 |
upscaled_latents = up(latents=latents, output_type="latent").frames
|
| 109 |
else:
|
| 110 |
upscaled_latents = latents
|
| 111 |
|
| 112 |
-
# Part 3: denoise / decode to PIL frames
|
| 113 |
denoise_strength = 0.4
|
| 114 |
final_frames = pipe(
|
| 115 |
conditions=ltx_conditions or None,
|
|
@@ -127,7 +128,6 @@ def generate(prompt, conditioning_file, height, width, num_frames, steps, seed,
|
|
| 127 |
output_type="pil",
|
| 128 |
).frames[0]
|
| 129 |
|
| 130 |
-
# Ensure frames are resized to expected resolution
|
| 131 |
final_frames = [f.resize((width, height)) for f in final_frames]
|
| 132 |
|
| 133 |
out_path = os.path.join(tempfile.gettempdir(), f"ltx_out_{random.randint(0,999999)}.mp4")
|
|
@@ -137,7 +137,7 @@ def generate(prompt, conditioning_file, height, width, num_frames, steps, seed,
|
|
| 137 |
|
| 138 |
|
| 139 |
with gr.Blocks(title="LTX-Video — Image/Video → Video") as demo:
|
| 140 |
-
gr.Markdown("# LTX-Video (Lightricks) —
|
| 141 |
|
| 142 |
with gr.Row():
|
| 143 |
with gr.Column(scale=3):
|
|
@@ -147,8 +147,8 @@ with gr.Blocks(title="LTX-Video — Image/Video → Video") as demo:
|
|
| 147 |
with gr.Column(scale=1):
|
| 148 |
height = gr.Number(label="Height", value=480)
|
| 149 |
width = gr.Number(label="Width", value=832)
|
| 150 |
-
num_frames = gr.Number(label="Num frames", value=
|
| 151 |
-
steps = gr.Number(label="Inference steps", value=
|
| 152 |
seed = gr.Number(label="Seed (optional)", value=0)
|
| 153 |
generate_btn = gr.Button("Generate")
|
| 154 |
|
|
@@ -158,4 +158,5 @@ with gr.Blocks(title="LTX-Video — Image/Video → Video") as demo:
|
|
| 158 |
generate_btn.click(fn=generate, inputs=[prompt, conditioning, height, width, num_frames, steps, seed, model_choice], outputs=[out_video, status])
|
| 159 |
|
| 160 |
if __name__ == "__main__":
|
|
|
|
| 161 |
demo.launch()
|
|
|
|
| 1 |
# app.py
|
| 2 |
+
# Hugging Face Space (Gradio) for Lightricks/LTX-Video — improved memory management
|
| 3 |
# Requirements (add to requirements.txt in the Space):
|
| 4 |
# torch>=2.1.2, diffusers, transformers, accelerate, safetensors, einops, gradio, huggingface_hub, opencv-python
|
| 5 |
|
|
|
|
| 12 |
from diffusers import LTXConditionPipeline, LTXLatentUpsamplePipeline
|
| 13 |
from diffusers.utils import export_to_video, load_image, load_video
|
| 14 |
|
| 15 |
+
# Map of friendly model ids to HF repo ids
|
| 16 |
MODEL_MAP = {
|
| 17 |
+
"13B (distilled)": "Lightricks/LTX-Video-0.9.8-13B-distilled",
|
| 18 |
+
"Latest": "Lightricks/LTX-Video",
|
| 19 |
}
|
| 20 |
|
| 21 |
+
HF_TOKEN = os.environ.get("HF_TOKEN") # Hugging Face token for private models
|
| 22 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
| 23 |
|
| 24 |
@lru_cache(maxsize=4)
|
| 25 |
def load_pipes(repo_id: str, torch_dtype_str: str = "bfloat16"):
|
|
|
|
| 26 |
dtype = getattr(torch, torch_dtype_str, torch.bfloat16)
|
| 27 |
+
pipe = LTXConditionPipeline.from_pretrained(
|
| 28 |
+
repo_id,
|
| 29 |
+
torch_dtype=dtype,
|
| 30 |
+
use_safetensors=True,
|
| 31 |
+
token=HF_TOKEN,
|
| 32 |
+
device_map="auto",
|
| 33 |
+
offload_folder="./offload",
|
| 34 |
+
)
|
| 35 |
+
|
| 36 |
up_id = repo_id.replace("LTX-Video-", "ltxv-spatial-upscaler-")
|
| 37 |
try:
|
| 38 |
+
up = LTXLatentUpsamplePipeline.from_pretrained(
|
| 39 |
+
up_id,
|
| 40 |
+
vae=pipe.vae,
|
| 41 |
+
torch_dtype=dtype,
|
| 42 |
+
use_safetensors=True,
|
| 43 |
+
token=HF_TOKEN,
|
| 44 |
+
device_map="auto",
|
| 45 |
+
offload_folder="./offload",
|
| 46 |
+
)
|
| 47 |
except Exception:
|
| 48 |
up = None
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
return pipe, up
|
| 50 |
|
| 51 |
|
| 52 |
def sanitize_size(h, w):
|
| 53 |
h, w = int(h), int(w)
|
|
|
|
| 54 |
h = max(64, min(1080, h))
|
| 55 |
w = max(64, min(2048, w))
|
| 56 |
return h, w
|
|
|
|
| 63 |
repo_id = MODEL_MAP.get(model_choice, list(MODEL_MAP.values())[0])
|
| 64 |
torch_dtype = "bfloat16" if DEVICE == "cuda" else "float32"
|
| 65 |
|
|
|
|
|
|
|
|
|
|
| 66 |
pipe, up = load_pipes(repo_id, torch_dtype_str=torch_dtype)
|
| 67 |
|
| 68 |
height, width = sanitize_size(height, width)
|
|
|
|
| 71 |
|
| 72 |
generator = torch.Generator(device=DEVICE).manual_seed(int(seed) if seed else random.randint(0, 2**31 - 1))
|
| 73 |
|
|
|
|
| 74 |
conditions = []
|
| 75 |
if conditioning_file is not None:
|
| 76 |
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(conditioning_file.name)[1])
|
| 77 |
tmp.write(conditioning_file.read())
|
| 78 |
tmp.flush()
|
| 79 |
tmp.close()
|
|
|
|
| 80 |
try:
|
| 81 |
img = load_image(tmp.name)
|
| 82 |
video_cond = export_to_video([img])
|
| 83 |
video = load_video(video_cond)
|
| 84 |
except Exception:
|
| 85 |
video = load_video(tmp.name)
|
|
|
|
| 86 |
conditions.append((video, 0))
|
| 87 |
|
|
|
|
| 88 |
from diffusers.pipelines.ltx.pipeline_ltx_condition import LTXVideoCondition
|
| 89 |
ltx_conditions = []
|
| 90 |
for vid, frame_idx in conditions:
|
|
|
|
| 92 |
|
| 93 |
negative_prompt = "worst quality, inconsistent motion, blurry, jittery, distorted"
|
| 94 |
|
|
|
|
| 95 |
downscale = 2 / 3
|
| 96 |
down_h, down_w = int(height * downscale), int(width * downscale)
|
| 97 |
latents = pipe(
|
|
|
|
| 106 |
output_type="latent",
|
| 107 |
).frames
|
| 108 |
|
|
|
|
| 109 |
if up is not None:
|
| 110 |
upscaled_latents = up(latents=latents, output_type="latent").frames
|
| 111 |
else:
|
| 112 |
upscaled_latents = latents
|
| 113 |
|
|
|
|
| 114 |
denoise_strength = 0.4
|
| 115 |
final_frames = pipe(
|
| 116 |
conditions=ltx_conditions or None,
|
|
|
|
| 128 |
output_type="pil",
|
| 129 |
).frames[0]
|
| 130 |
|
|
|
|
| 131 |
final_frames = [f.resize((width, height)) for f in final_frames]
|
| 132 |
|
| 133 |
out_path = os.path.join(tempfile.gettempdir(), f"ltx_out_{random.randint(0,999999)}.mp4")
|
|
|
|
| 137 |
|
| 138 |
|
| 139 |
with gr.Blocks(title="LTX-Video — Image/Video → Video") as demo:
|
| 140 |
+
gr.Markdown("# LTX-Video (Lightricks) — improved memory Space\nUpload an image or a short video to condition on, write an English prompt and press Generate. GPU highly recommended.")
|
| 141 |
|
| 142 |
with gr.Row():
|
| 143 |
with gr.Column(scale=3):
|
|
|
|
| 147 |
with gr.Column(scale=1):
|
| 148 |
height = gr.Number(label="Height", value=480)
|
| 149 |
width = gr.Number(label="Width", value=832)
|
| 150 |
+
num_frames = gr.Number(label="Num frames", value=16)
|
| 151 |
+
steps = gr.Number(label="Inference steps", value=20)
|
| 152 |
seed = gr.Number(label="Seed (optional)", value=0)
|
| 153 |
generate_btn = gr.Button("Generate")
|
| 154 |
|
|
|
|
| 158 |
generate_btn.click(fn=generate, inputs=[prompt, conditioning, height, width, num_frames, steps, seed, model_choice], outputs=[out_video, status])
|
| 159 |
|
| 160 |
if __name__ == "__main__":
|
| 161 |
+
os.makedirs("./offload", exist_ok=True) # создаем папку для offload
|
| 162 |
demo.launch()
|