FENST4R commited on
Commit
d7ee779
·
verified ·
1 Parent(s): 002507c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -26
app.py CHANGED
@@ -1,5 +1,5 @@
1
  # app.py
2
- # Hugging Face Space (Gradio) for Lightricks/LTX-Video
3
  # Requirements (add to requirements.txt in the Space):
4
  # torch>=2.1.2, diffusers, transformers, accelerate, safetensors, einops, gradio, huggingface_hub, opencv-python
5
 
@@ -12,34 +12,45 @@ import gradio as gr
12
  from diffusers import LTXConditionPipeline, LTXLatentUpsamplePipeline
13
  from diffusers.utils import export_to_video, load_image, load_video
14
 
15
- # Map of friendly model ids to HF repo ids (adjust if you want different variants)
16
  MODEL_MAP = {
17
- "13B (distilled)": "Lightricks/LTX-Video-0.9.8-13B-distilled"
 
18
  }
19
 
20
- HF_TOKEN = os.environ.get("HF_TOKEN") # optional better to set in Space secrets
21
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
22
 
23
  @lru_cache(maxsize=4)
24
  def load_pipes(repo_id: str, torch_dtype_str: str = "bfloat16"):
25
- # Load both the main pipe and the latent upsampler when available
26
  dtype = getattr(torch, torch_dtype_str, torch.bfloat16)
27
- pipe = LTXConditionPipeline.from_pretrained(repo_id, torch_dtype=dtype, use_safetensors=True)
 
 
 
 
 
 
 
 
28
  up_id = repo_id.replace("LTX-Video-", "ltxv-spatial-upscaler-")
29
  try:
30
- up = LTXLatentUpsamplePipeline.from_pretrained(up_id, vae=pipe.vae, torch_dtype=dtype, use_safetensors=True)
 
 
 
 
 
 
 
 
31
  except Exception:
32
  up = None
33
- if DEVICE == "cuda":
34
- pipe.to("cuda")
35
- if up is not None:
36
- up.to("cuda")
37
  return pipe, up
38
 
39
 
40
  def sanitize_size(h, w):
41
  h, w = int(h), int(w)
42
- # model expects multiples constrained by vae; we'll let the pipeline handle padding but avoid ridiculous sizes
43
  h = max(64, min(1080, h))
44
  w = max(64, min(2048, w))
45
  return h, w
@@ -52,9 +63,6 @@ def generate(prompt, conditioning_file, height, width, num_frames, steps, seed,
52
  repo_id = MODEL_MAP.get(model_choice, list(MODEL_MAP.values())[0])
53
  torch_dtype = "bfloat16" if DEVICE == "cuda" else "float32"
54
 
55
- with gr.Row():
56
- pass
57
-
58
  pipe, up = load_pipes(repo_id, torch_dtype_str=torch_dtype)
59
 
60
  height, width = sanitize_size(height, width)
@@ -63,24 +71,20 @@ def generate(prompt, conditioning_file, height, width, num_frames, steps, seed,
63
 
64
  generator = torch.Generator(device=DEVICE).manual_seed(int(seed) if seed else random.randint(0, 2**31 - 1))
65
 
66
- # prepare conditioning
67
  conditions = []
68
  if conditioning_file is not None:
69
  tmp = tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(conditioning_file.name)[1])
70
  tmp.write(conditioning_file.read())
71
  tmp.flush()
72
  tmp.close()
73
- # try to load as image, otherwise as video
74
  try:
75
  img = load_image(tmp.name)
76
  video_cond = export_to_video([img])
77
  video = load_video(video_cond)
78
  except Exception:
79
  video = load_video(tmp.name)
80
- # use first frame as condition example
81
  conditions.append((video, 0))
82
 
83
- # Build LTXVideoCondition objects
84
  from diffusers.pipelines.ltx.pipeline_ltx_condition import LTXVideoCondition
85
  ltx_conditions = []
86
  for vid, frame_idx in conditions:
@@ -88,7 +92,6 @@ def generate(prompt, conditioning_file, height, width, num_frames, steps, seed,
88
 
89
  negative_prompt = "worst quality, inconsistent motion, blurry, jittery, distorted"
90
 
91
- # Part 1: generate at downscaled resolution for speed (recommended in model card)
92
  downscale = 2 / 3
93
  down_h, down_w = int(height * downscale), int(width * downscale)
94
  latents = pipe(
@@ -103,13 +106,11 @@ def generate(prompt, conditioning_file, height, width, num_frames, steps, seed,
103
  output_type="latent",
104
  ).frames
105
 
106
- # Part 2: upsample latents (if available)
107
  if up is not None:
108
  upscaled_latents = up(latents=latents, output_type="latent").frames
109
  else:
110
  upscaled_latents = latents
111
 
112
- # Part 3: denoise / decode to PIL frames
113
  denoise_strength = 0.4
114
  final_frames = pipe(
115
  conditions=ltx_conditions or None,
@@ -127,7 +128,6 @@ def generate(prompt, conditioning_file, height, width, num_frames, steps, seed,
127
  output_type="pil",
128
  ).frames[0]
129
 
130
- # Ensure frames are resized to expected resolution
131
  final_frames = [f.resize((width, height)) for f in final_frames]
132
 
133
  out_path = os.path.join(tempfile.gettempdir(), f"ltx_out_{random.randint(0,999999)}.mp4")
@@ -137,7 +137,7 @@ def generate(prompt, conditioning_file, height, width, num_frames, steps, seed,
137
 
138
 
139
  with gr.Blocks(title="LTX-Video — Image/Video → Video") as demo:
140
- gr.Markdown("# LTX-Video (Lightricks) — simple Space\nUpload an image or a short video to condition on, write an English prompt and press Generate. GPU highly recommended.")
141
 
142
  with gr.Row():
143
  with gr.Column(scale=3):
@@ -147,8 +147,8 @@ with gr.Blocks(title="LTX-Video — Image/Video → Video") as demo:
147
  with gr.Column(scale=1):
148
  height = gr.Number(label="Height", value=480)
149
  width = gr.Number(label="Width", value=832)
150
- num_frames = gr.Number(label="Num frames", value=96)
151
- steps = gr.Number(label="Inference steps", value=30)
152
  seed = gr.Number(label="Seed (optional)", value=0)
153
  generate_btn = gr.Button("Generate")
154
 
@@ -158,4 +158,5 @@ with gr.Blocks(title="LTX-Video — Image/Video → Video") as demo:
158
  generate_btn.click(fn=generate, inputs=[prompt, conditioning, height, width, num_frames, steps, seed, model_choice], outputs=[out_video, status])
159
 
160
  if __name__ == "__main__":
 
161
  demo.launch()
 
1
  # app.py
2
+ # Hugging Face Space (Gradio) for Lightricks/LTX-Video — improved memory management
3
  # Requirements (add to requirements.txt in the Space):
4
  # torch>=2.1.2, diffusers, transformers, accelerate, safetensors, einops, gradio, huggingface_hub, opencv-python
5
 
 
12
  from diffusers import LTXConditionPipeline, LTXLatentUpsamplePipeline
13
  from diffusers.utils import export_to_video, load_image, load_video
14
 
15
+ # Map of friendly model ids to HF repo ids
16
  MODEL_MAP = {
17
+ "13B (distilled)": "Lightricks/LTX-Video-0.9.8-13B-distilled",
18
+ "Latest": "Lightricks/LTX-Video",
19
  }
20
 
21
+ HF_TOKEN = os.environ.get("HF_TOKEN") # Hugging Face token for private models
22
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
23
 
24
  @lru_cache(maxsize=4)
25
  def load_pipes(repo_id: str, torch_dtype_str: str = "bfloat16"):
 
26
  dtype = getattr(torch, torch_dtype_str, torch.bfloat16)
27
+ pipe = LTXConditionPipeline.from_pretrained(
28
+ repo_id,
29
+ torch_dtype=dtype,
30
+ use_safetensors=True,
31
+ token=HF_TOKEN,
32
+ device_map="auto",
33
+ offload_folder="./offload",
34
+ )
35
+
36
  up_id = repo_id.replace("LTX-Video-", "ltxv-spatial-upscaler-")
37
  try:
38
+ up = LTXLatentUpsamplePipeline.from_pretrained(
39
+ up_id,
40
+ vae=pipe.vae,
41
+ torch_dtype=dtype,
42
+ use_safetensors=True,
43
+ token=HF_TOKEN,
44
+ device_map="auto",
45
+ offload_folder="./offload",
46
+ )
47
  except Exception:
48
  up = None
 
 
 
 
49
  return pipe, up
50
 
51
 
52
  def sanitize_size(h, w):
53
  h, w = int(h), int(w)
 
54
  h = max(64, min(1080, h))
55
  w = max(64, min(2048, w))
56
  return h, w
 
63
  repo_id = MODEL_MAP.get(model_choice, list(MODEL_MAP.values())[0])
64
  torch_dtype = "bfloat16" if DEVICE == "cuda" else "float32"
65
 
 
 
 
66
  pipe, up = load_pipes(repo_id, torch_dtype_str=torch_dtype)
67
 
68
  height, width = sanitize_size(height, width)
 
71
 
72
  generator = torch.Generator(device=DEVICE).manual_seed(int(seed) if seed else random.randint(0, 2**31 - 1))
73
 
 
74
  conditions = []
75
  if conditioning_file is not None:
76
  tmp = tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(conditioning_file.name)[1])
77
  tmp.write(conditioning_file.read())
78
  tmp.flush()
79
  tmp.close()
 
80
  try:
81
  img = load_image(tmp.name)
82
  video_cond = export_to_video([img])
83
  video = load_video(video_cond)
84
  except Exception:
85
  video = load_video(tmp.name)
 
86
  conditions.append((video, 0))
87
 
 
88
  from diffusers.pipelines.ltx.pipeline_ltx_condition import LTXVideoCondition
89
  ltx_conditions = []
90
  for vid, frame_idx in conditions:
 
92
 
93
  negative_prompt = "worst quality, inconsistent motion, blurry, jittery, distorted"
94
 
 
95
  downscale = 2 / 3
96
  down_h, down_w = int(height * downscale), int(width * downscale)
97
  latents = pipe(
 
106
  output_type="latent",
107
  ).frames
108
 
 
109
  if up is not None:
110
  upscaled_latents = up(latents=latents, output_type="latent").frames
111
  else:
112
  upscaled_latents = latents
113
 
 
114
  denoise_strength = 0.4
115
  final_frames = pipe(
116
  conditions=ltx_conditions or None,
 
128
  output_type="pil",
129
  ).frames[0]
130
 
 
131
  final_frames = [f.resize((width, height)) for f in final_frames]
132
 
133
  out_path = os.path.join(tempfile.gettempdir(), f"ltx_out_{random.randint(0,999999)}.mp4")
 
137
 
138
 
139
  with gr.Blocks(title="LTX-Video — Image/Video → Video") as demo:
140
+ gr.Markdown("# LTX-Video (Lightricks) — improved memory Space\nUpload an image or a short video to condition on, write an English prompt and press Generate. GPU highly recommended.")
141
 
142
  with gr.Row():
143
  with gr.Column(scale=3):
 
147
  with gr.Column(scale=1):
148
  height = gr.Number(label="Height", value=480)
149
  width = gr.Number(label="Width", value=832)
150
+ num_frames = gr.Number(label="Num frames", value=16)
151
+ steps = gr.Number(label="Inference steps", value=20)
152
  seed = gr.Number(label="Seed (optional)", value=0)
153
  generate_btn = gr.Button("Generate")
154
 
 
158
  generate_btn.click(fn=generate, inputs=[prompt, conditioning, height, width, num_frames, steps, seed, model_choice], outputs=[out_video, status])
159
 
160
  if __name__ == "__main__":
161
+ os.makedirs("./offload", exist_ok=True) # создаем папку для offload
162
  demo.launch()