Spaces:

FENST4R
/

LTX

Running

App Files Files Community

FENST4R commited on Sep 13, 2025

Commit

d7ee779

verified ·

1 Parent(s): 002507c

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -26

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 # app.py
-# Hugging Face Space (Gradio) for Lightricks/LTX-Video
 # Requirements (add to requirements.txt in the Space):
 # torch>=2.1.2, diffusers, transformers, accelerate, safetensors, einops, gradio, huggingface_hub, opencv-python
@@ -12,34 +12,45 @@ import gradio as gr
 from diffusers import LTXConditionPipeline, LTXLatentUpsamplePipeline
 from diffusers.utils import export_to_video, load_image, load_video
-# Map of friendly model ids to HF repo ids (adjust if you want different variants)
 MODEL_MAP = {
-    "13B (distilled)": "Lightricks/LTX-Video-0.9.8-13B-distilled"
 }
-HF_TOKEN = os.environ.get("HF_TOKEN")  # optional — better to set in Space secrets
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 @lru_cache(maxsize=4)
 def load_pipes(repo_id: str, torch_dtype_str: str = "bfloat16"):
-    # Load both the main pipe and the latent upsampler when available
     dtype = getattr(torch, torch_dtype_str, torch.bfloat16)
-    pipe = LTXConditionPipeline.from_pretrained(repo_id, torch_dtype=dtype, use_safetensors=True)
     up_id = repo_id.replace("LTX-Video-", "ltxv-spatial-upscaler-")
     try:
-        up = LTXLatentUpsamplePipeline.from_pretrained(up_id, vae=pipe.vae, torch_dtype=dtype, use_safetensors=True)
     except Exception:
         up = None
-    if DEVICE == "cuda":
-        pipe.to("cuda")
-        if up is not None:
-            up.to("cuda")
     return pipe, up
 def sanitize_size(h, w):
     h, w = int(h), int(w)
-    # model expects multiples constrained by vae; we'll let the pipeline handle padding but avoid ridiculous sizes
     h = max(64, min(1080, h))
     w = max(64, min(2048, w))
     return h, w
@@ -52,9 +63,6 @@ def generate(prompt, conditioning_file, height, width, num_frames, steps, seed,
     repo_id = MODEL_MAP.get(model_choice, list(MODEL_MAP.values())[0])
     torch_dtype = "bfloat16" if DEVICE == "cuda" else "float32"
-    with gr.Row():
-        pass
     pipe, up = load_pipes(repo_id, torch_dtype_str=torch_dtype)
     height, width = sanitize_size(height, width)
@@ -63,24 +71,20 @@ def generate(prompt, conditioning_file, height, width, num_frames, steps, seed,
     generator = torch.Generator(device=DEVICE).manual_seed(int(seed) if seed else random.randint(0, 2**31 - 1))
-    # prepare conditioning
     conditions = []
     if conditioning_file is not None:
         tmp = tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(conditioning_file.name)[1])
         tmp.write(conditioning_file.read())
         tmp.flush()
         tmp.close()
-        # try to load as image, otherwise as video
         try:
             img = load_image(tmp.name)
             video_cond = export_to_video([img])
             video = load_video(video_cond)
         except Exception:
             video = load_video(tmp.name)
-        # use first frame as condition example
         conditions.append((video, 0))
-    # Build LTXVideoCondition objects
     from diffusers.pipelines.ltx.pipeline_ltx_condition import LTXVideoCondition
     ltx_conditions = []
     for vid, frame_idx in conditions:
@@ -88,7 +92,6 @@ def generate(prompt, conditioning_file, height, width, num_frames, steps, seed,
     negative_prompt = "worst quality, inconsistent motion, blurry, jittery, distorted"
-    # Part 1: generate at downscaled resolution for speed (recommended in model card)
     downscale = 2 / 3
     down_h, down_w = int(height * downscale), int(width * downscale)
     latents = pipe(
@@ -103,13 +106,11 @@ def generate(prompt, conditioning_file, height, width, num_frames, steps, seed,
         output_type="latent",
     ).frames
-    # Part 2: upsample latents (if available)
     if up is not None:
         upscaled_latents = up(latents=latents, output_type="latent").frames
     else:
         upscaled_latents = latents
-    # Part 3: denoise / decode to PIL frames
     denoise_strength = 0.4
     final_frames = pipe(
         conditions=ltx_conditions or None,
@@ -127,7 +128,6 @@ def generate(prompt, conditioning_file, height, width, num_frames, steps, seed,
         output_type="pil",
     ).frames[0]
-    # Ensure frames are resized to expected resolution
     final_frames = [f.resize((width, height)) for f in final_frames]
     out_path = os.path.join(tempfile.gettempdir(), f"ltx_out_{random.randint(0,999999)}.mp4")
@@ -137,7 +137,7 @@ def generate(prompt, conditioning_file, height, width, num_frames, steps, seed,
 with gr.Blocks(title="LTX-Video — Image/Video → Video") as demo:
-    gr.Markdown("# LTX-Video (Lightricks) — simple Space\nUpload an image or a short video to condition on, write an English prompt and press Generate. GPU highly recommended.")
     with gr.Row():
         with gr.Column(scale=3):
@@ -147,8 +147,8 @@ with gr.Blocks(title="LTX-Video — Image/Video → Video") as demo:
         with gr.Column(scale=1):
             height = gr.Number(label="Height", value=480)
             width = gr.Number(label="Width", value=832)
-            num_frames = gr.Number(label="Num frames", value=96)
-            steps = gr.Number(label="Inference steps", value=30)
             seed = gr.Number(label="Seed (optional)", value=0)
             generate_btn = gr.Button("Generate")
@@ -158,4 +158,5 @@ with gr.Blocks(title="LTX-Video — Image/Video → Video") as demo:
     generate_btn.click(fn=generate, inputs=[prompt, conditioning, height, width, num_frames, steps, seed, model_choice], outputs=[out_video, status])
 if __name__ == "__main__":
     demo.launch()

 # app.py
+# Hugging Face Space (Gradio) for Lightricks/LTX-Video — improved memory management
 # Requirements (add to requirements.txt in the Space):
 # torch>=2.1.2, diffusers, transformers, accelerate, safetensors, einops, gradio, huggingface_hub, opencv-python
 from diffusers import LTXConditionPipeline, LTXLatentUpsamplePipeline
 from diffusers.utils import export_to_video, load_image, load_video
+# Map of friendly model ids to HF repo ids
 MODEL_MAP = {
+    "13B (distilled)": "Lightricks/LTX-Video-0.9.8-13B-distilled",
+    "Latest": "Lightricks/LTX-Video",
 }
+HF_TOKEN = os.environ.get("HF_TOKEN")  # Hugging Face token for private models
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 @lru_cache(maxsize=4)
 def load_pipes(repo_id: str, torch_dtype_str: str = "bfloat16"):
     dtype = getattr(torch, torch_dtype_str, torch.bfloat16)
+    pipe = LTXConditionPipeline.from_pretrained(
+        repo_id,
+        torch_dtype=dtype,
+        use_safetensors=True,
+        token=HF_TOKEN,
+        device_map="auto",
+        offload_folder="./offload",
+    )
     up_id = repo_id.replace("LTX-Video-", "ltxv-spatial-upscaler-")
     try:
+        up = LTXLatentUpsamplePipeline.from_pretrained(
+            up_id,
+            vae=pipe.vae,
+            torch_dtype=dtype,
+            use_safetensors=True,
+            token=HF_TOKEN,
+            device_map="auto",
+            offload_folder="./offload",
+        )
     except Exception:
         up = None
     return pipe, up
 def sanitize_size(h, w):
     h, w = int(h), int(w)
     h = max(64, min(1080, h))
     w = max(64, min(2048, w))
     return h, w
     repo_id = MODEL_MAP.get(model_choice, list(MODEL_MAP.values())[0])
     torch_dtype = "bfloat16" if DEVICE == "cuda" else "float32"
     pipe, up = load_pipes(repo_id, torch_dtype_str=torch_dtype)
     height, width = sanitize_size(height, width)
     generator = torch.Generator(device=DEVICE).manual_seed(int(seed) if seed else random.randint(0, 2**31 - 1))
     conditions = []
     if conditioning_file is not None:
         tmp = tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(conditioning_file.name)[1])
         tmp.write(conditioning_file.read())
         tmp.flush()
         tmp.close()
         try:
             img = load_image(tmp.name)
             video_cond = export_to_video([img])
             video = load_video(video_cond)
         except Exception:
             video = load_video(tmp.name)
         conditions.append((video, 0))
     from diffusers.pipelines.ltx.pipeline_ltx_condition import LTXVideoCondition
     ltx_conditions = []
     for vid, frame_idx in conditions:
     negative_prompt = "worst quality, inconsistent motion, blurry, jittery, distorted"
     downscale = 2 / 3
     down_h, down_w = int(height * downscale), int(width * downscale)
     latents = pipe(
         output_type="latent",
     ).frames
     if up is not None:
         upscaled_latents = up(latents=latents, output_type="latent").frames
     else:
         upscaled_latents = latents
     denoise_strength = 0.4
     final_frames = pipe(
         conditions=ltx_conditions or None,
         output_type="pil",
     ).frames[0]
     final_frames = [f.resize((width, height)) for f in final_frames]
     out_path = os.path.join(tempfile.gettempdir(), f"ltx_out_{random.randint(0,999999)}.mp4")
 with gr.Blocks(title="LTX-Video — Image/Video → Video") as demo:
+    gr.Markdown("# LTX-Video (Lightricks) — improved memory Space\nUpload an image or a short video to condition on, write an English prompt and press Generate. GPU highly recommended.")
     with gr.Row():
         with gr.Column(scale=3):
         with gr.Column(scale=1):
             height = gr.Number(label="Height", value=480)
             width = gr.Number(label="Width", value=832)
+            num_frames = gr.Number(label="Num frames", value=16)
+            steps = gr.Number(label="Inference steps", value=20)
             seed = gr.Number(label="Seed (optional)", value=0)
             generate_btn = gr.Button("Generate")
     generate_btn.click(fn=generate, inputs=[prompt, conditioning, height, width, num_frames, steps, seed, model_choice], outputs=[out_video, status])
 if __name__ == "__main__":
+    os.makedirs("./offload", exist_ok=True)  # создаем папку для offload
     demo.launch()