Test

Paused

App Files Files Community

alex commited on Jan 12

Commit

fc134c2

1 Parent(s): f8c25ce

allow loRA

Browse files

Files changed (3) hide show

app.py +571 -62
packages/ltx-core/src/ltx_core/loader/fuse_loras.py +38 -20
packages/ltx-core/src/ltx_core/loader/single_gpu_model_builder.py +126 -2

app.py CHANGED Viewed

@@ -30,6 +30,8 @@ from ltx_pipelines.utils.constants import (
     DEFAULT_FRAME_RATE,
     DEFAULT_LORA_STRENGTH,
 )
 MAX_SEED = np.iinfo(np.int32).max
@@ -182,24 +184,48 @@ print("Loading LTX-2 Distilled pipeline...")
 print("=" * 80)
 checkpoint_path = get_hub_or_local_checkpoint(DEFAULT_REPO_ID, DEFAULT_CHECKPOINT_FILENAME)
-distilled_lora_path = get_hub_or_local_checkpoint(DEFAULT_REPO_ID, DEFAULT_DISTILLED_LORA_FILENAME)
 spatial_upsampler_path = get_hub_or_local_checkpoint(DEFAULT_REPO_ID, DEFAULT_SPATIAL_UPSAMPLER_FILENAME)
 print(f"Initializing pipeline with:")
 print(f"  checkpoint_path={checkpoint_path}")
-print(f"  distilled_lora_path={distilled_lora_path}")
 print(f"  spatial_upsampler_path={spatial_upsampler_path}")
 # Load distilled LoRA as a regular LoRA
 loras = [
     LoraPathStrengthAndSDOps(
         path=distilled_lora_path,
         strength=DEFAULT_LORA_STRENGTH,
         sd_ops=LTXV_LORA_COMFY_RENAMING_MAP,
-    )
 ]
 # Initialize pipeline WITHOUT text encoder (gemma_root=None)
 # Text encoding will be done by external space
 pipeline = DistilledPipeline(
@@ -222,23 +248,6 @@ print("=" * 80)
 print("Pipeline fully loaded and ready!")
 print("=" * 80)
-def get_duration(
-    input_image,
-    prompt,
-    duration,
-    enhance_prompt,
-    seed,
-    randomize_seed,
-    height,
-    width,
-    progress
-):
-    if duration <= 5:
-        return 80
-    elif duration <= 10:
-        return 120
-    else:
-        return 180
 class RadioAnimated(gr.HTML):
     """
@@ -274,41 +283,254 @@ class RadioAnimated(gr.HTML):
         js_on_load = r"""
         (() => {
-          const wrap = element.querySelector('.ra-wrap');
-          const inner = element.querySelector('.ra-inner');
-          const highlight = element.querySelector('.ra-highlight');
-          const inputs = Array.from(element.querySelectorAll('.ra-input'));
-          if (!inputs.length) return;
-          const choices = inputs.map(i => i.value);
-          function setHighlightByIndex(idx) {
-            const n = choices.length;
-            const pct = 100 / n;
-            highlight.style.width = `calc(${pct}% - 6px)`;
-            highlight.style.transform = `translateX(${idx * 100}%)`;
-          }
-          function setCheckedByValue(val, shouldTrigger=false) {
-            const idx = Math.max(0, choices.indexOf(val));
-            inputs.forEach((inp, i) => { inp.checked = (i === idx); });
-            setHighlightByIndex(idx);
-            props.value = choices[idx];
-            if (shouldTrigger) trigger('change', props.value);
-          }
-          // Init from props.value
-          setCheckedByValue(props.value ?? choices[0], false);
-          // Input handlers
-          inputs.forEach((inp) => {
-            inp.addEventListener('change', () => {
-              setCheckedByValue(inp.value, true);
             });
           });
         })();
         """
         super().__init__(
@@ -318,10 +540,42 @@ class RadioAnimated(gr.HTML):
             **kwargs
         )
 def generate_video_example(input_image, prompt, duration, progress=gr.Progress(track_tqdm=True)):
-    output_video, seed = generate_video(input_image, prompt, 5, True, 42, True, DEFAULT_1_STAGE_HEIGHT, DEFAULT_1_STAGE_WIDTH, progress)
     return output_video
 @spaces.GPU(duration=get_duration)
 def generate_video(
@@ -333,6 +587,7 @@ def generate_video(
     randomize_seed: bool = True,
     height: int = DEFAULT_1_STAGE_HEIGHT,
     width: int = DEFAULT_1_STAGE_WIDTH,
     progress=gr.Progress(track_tqdm=True),
 ):
     """
@@ -346,8 +601,10 @@ def generate_video(
         randomize_seed: If True, a random seed is generated for each run.
         height: Output video height in pixels.
         width: Output video width in pixels.
         progress: Gradio progress tracker.
     Returns:
         A tuple of:
             - output_path: Path to the generated MP4 video file.
             - seed: The seed used for generation.
@@ -396,6 +653,20 @@ def generate_video(
         del embeddings, final_prompt, status
         torch.cuda.empty_cache()
         # Run inference - progress automatically tracks tqdm from pipeline
         pipeline(
             prompt=prompt,
@@ -431,7 +702,42 @@ def apply_duration(duration: str):
     duration_s = int(duration[:-1])
     return duration_s
 css = """
     #col-container {
         margin: 0 auto;
         max-width: 1600px;
@@ -570,6 +876,176 @@ css += """
     }
     """
 with gr.Blocks(title="LTX-2 Video Distilled 🎥🔈") as demo:
     gr.HTML(
@@ -605,12 +1081,19 @@ with gr.Blocks(title="LTX-2 Video Distilled 🎥🔈") as demo:
                     height=512
                 )
                 prompt = gr.Textbox(
                     label="Prompt",
                     value="Make this image come alive with cinematic motion, smooth animation",
                     lines=3,
                     max_lines=3,
-                    placeholder="Describe the motion and animation you want..."
                 )
                 enhance_prompt = gr.Checkbox(
@@ -633,10 +1116,9 @@ with gr.Blocks(title="LTX-2 Video Distilled 🎥🔈") as demo:
             with gr.Column(elem_id="step-column"):
                 output_video = gr.Video(label="Generated Video", autoplay=True, height=512)
-                with gr.Row():
-                    with gr.Column():
                         radioanimated_duration = RadioAnimated(
                             choices=["3s", "5s", "10s", "15s"],
                             value="3s",
@@ -651,8 +1133,7 @@ with gr.Blocks(title="LTX-2 Video Distilled 🎥🔈") as demo:
                             step=0.1,
                             visible=False
                         )
-                    with gr.Column():
                         radioanimated_resolution = RadioAnimated(
                             choices=["768x512", "512x512", "512x768"],
                             value=f"{DEFAULT_1_STAGE_WIDTH}x{DEFAULT_1_STAGE_HEIGHT}",
@@ -661,10 +1142,30 @@ with gr.Blocks(title="LTX-2 Video Distilled 🎥🔈") as demo:
                         width = gr.Number(label="Width", value=DEFAULT_1_STAGE_WIDTH, precision=0, visible=False)
                         height = gr.Number(label="Height", value=DEFAULT_1_STAGE_HEIGHT, precision=0, visible=False)
                 generate_btn = gr.Button("🤩 Generate Video", variant="primary", elem_classes="button-gradient")
     radioanimated_duration.change(
         fn=apply_duration,
@@ -678,6 +1179,13 @@ with gr.Blocks(title="LTX-2 Video Distilled 🎥🔈") as demo:
         outputs=[width, height],
         api_visibility="private"
     )
     generate_btn.click(
         fn=generate_video,
@@ -690,6 +1198,7 @@ with gr.Blocks(title="LTX-2 Video Distilled 🎥🔈") as demo:
             randomize_seed,
             height,
             width,
         ],
         outputs=[output_video,seed]
     )
@@ -716,7 +1225,7 @@ with gr.Blocks(title="LTX-2 Video Distilled 🎥🔈") as demo:
         ],
         fn=generate_video_example,
-        inputs=[input_image, prompt],
         outputs = [output_video],
         label="Example",
         cache_examples=True,

     DEFAULT_FRAME_RATE,
     DEFAULT_LORA_STRENGTH,
 )
+from ltx_core.loader.single_gpu_model_builder import set_lora_enabled
 MAX_SEED = np.iinfo(np.int32).max
 print("=" * 80)
 checkpoint_path = get_hub_or_local_checkpoint(DEFAULT_REPO_ID, DEFAULT_CHECKPOINT_FILENAME)
 spatial_upsampler_path = get_hub_or_local_checkpoint(DEFAULT_REPO_ID, DEFAULT_SPATIAL_UPSAMPLER_FILENAME)
 print(f"Initializing pipeline with:")
 print(f"  checkpoint_path={checkpoint_path}")
 print(f"  spatial_upsampler_path={spatial_upsampler_path}")
+distilled_lora_path = get_hub_or_local_checkpoint(
+    DEFAULT_REPO_ID,
+    DEFAULT_DISTILLED_LORA_FILENAME,
+)
+dolly_in_lora_path = get_hub_or_local_checkpoint(
+    "Lightricks/LTX-2-19b-LoRA-Camera-Control-Dolly-In",
+    "ltx-2-19b-lora-camera-control-dolly-in.safetensors",
+)
+dolly_out_lora_path = get_hub_or_local_checkpoint(
+    "Lightricks/LTX-2-19b-LoRA-Camera-Control-Dolly-Out",
+    "ltx-2-19b-lora-camera-control-dolly-out.safetensors",
+)
 # Load distilled LoRA as a regular LoRA
 loras = [
+    # --- fused / base behavior ---
     LoraPathStrengthAndSDOps(
         path=distilled_lora_path,
         strength=DEFAULT_LORA_STRENGTH,
         sd_ops=LTXV_LORA_COMFY_RENAMING_MAP,
+    ),
+    # # --- runtime-toggle camera controls ---#
+    LoraPathStrengthAndSDOps(dolly_in_lora_path,    DEFAULT_LORA_STRENGTH, LTXV_LORA_COMFY_RENAMING_MAP),
+    LoraPathStrengthAndSDOps(dolly_out_lora_path,   DEFAULT_LORA_STRENGTH, LTXV_LORA_COMFY_RENAMING_MAP),
 ]
+# Runtime-toggle LoRAs (exclude fused distilled at index 0)
+RUNTIME_LORA_CHOICES = [
+    ("No LoRA", -1),
+    ("Dolly In", 0),
+    ("Dolly Out", 1),
+]
 # Initialize pipeline WITHOUT text encoder (gemma_root=None)
 # Text encoding will be done by external space
 pipeline = DistilledPipeline(
 print("Pipeline fully loaded and ready!")
 print("=" * 80)
 class RadioAnimated(gr.HTML):
     """
         js_on_load = r"""
         (() => {
+            const wrap = element.querySelector('.ra-wrap');
+            const inner = element.querySelector('.ra-inner');
+            const highlight = element.querySelector('.ra-highlight');
+            const inputs = Array.from(element.querySelectorAll('.ra-input'));
+            const labels = Array.from(element.querySelectorAll('.ra-label'));
+            if (!inputs.length || !labels.length) return;
+            const choices = inputs.map(i => i.value);
+            const PAD = 6; // must match .ra-inner padding and .ra-highlight top/left
+            let currentIdx = 0;
+            function setHighlightByIndex(idx) {
+                currentIdx = idx;
+                const lbl = labels[idx];
+                if (!lbl) return;
+                const innerRect = inner.getBoundingClientRect();
+                const lblRect = lbl.getBoundingClientRect();
+                // width matches the label exactly
+                highlight.style.width = `${lblRect.width}px`;
+                // highlight has left: 6px, so subtract PAD to align
+                const x = (lblRect.left - innerRect.left - PAD);
+                highlight.style.transform = `translateX(${x}px)`;
+            }
+            function setCheckedByValue(val, shouldTrigger=false) {
+                const idx = Math.max(0, choices.indexOf(val));
+                inputs.forEach((inp, i) => { inp.checked = (i === idx); });
+                // Wait a frame in case fonts/layout settle (prevents rare drift)
+                requestAnimationFrame(() => setHighlightByIndex(idx));
+                props.value = choices[idx];
+                if (shouldTrigger) trigger('change', props.value);
+            }
+            // Init
+            setCheckedByValue(props.value ?? choices[0], false);
+            // Input handlers
+            inputs.forEach((inp) => {
+                inp.addEventListener('change', () => setCheckedByValue(inp.value, true));
             });
+            // Recalc on resize (important in Gradio layouts)
+            window.addEventListener('resize', () => setHighlightByIndex(currentIdx));
+        })();
+        """
+        super().__init__(
+            value=value,
+            html_template=html_template,
+            js_on_load=js_on_load,
+            **kwargs
+        )
+class PromptBox(gr.HTML):
+    """
+    DeepSite-like prompt box (HTML textarea) that behaves like an input component.
+    Outputs: the current text value (string)
+    """
+    def __init__(self, value="", placeholder="Describe the video with audio you want to generate...", **kwargs):
+        uid = uuid.uuid4().hex[:8]
+        html_template = f"""
+        <div style="text-align:center; font-weight:600; margin-bottom:6px;">
+            Prompt
+        </div>
+        <div class="ds-prompt" data-ds="{uid}">
+          <textarea class="ds-textarea" rows="3"
+            placeholder="{placeholder}"></textarea>
+        </div>
+        """
+        js_on_load = r"""
+        (() => {
+          const textarea = element.querySelector(".ds-textarea");
+          if (!textarea) return;
+          // Auto-resize (optional, but nice)
+          const autosize = () => {
+            textarea.style.height = "0px";
+            textarea.style.height = Math.min(textarea.scrollHeight, 240) + "px";
+          };
+          // Set initial value from props.value
+          const setValue = (v, triggerChange=false) => {
+            const val = (v ?? "");
+            if (textarea.value !== val) textarea.value = val;
+            autosize();
+            props.value = textarea.value;
+            if (triggerChange) trigger("change", props.value);
+          };
+          setValue(props.value, false);
+          // Update Gradio value on input
+          textarea.addEventListener("input", () => {
+            autosize();
+            props.value = textarea.value;
+            trigger("change", props.value);
           });
+          let last = props.value;
+          const syncFromProps = () => {
+            if (props.value !== last) {
+              last = props.value;
+              setValue(last, false); // don't re-trigger change loop
+            }
+            requestAnimationFrame(syncFromProps);
+          };
+          requestAnimationFrame(syncFromProps);
+        })();
+        """
+        super().__init__(
+            value=value,
+            html_template=html_template,
+            js_on_load=js_on_load,
+            **kwargs
+        )
+class CameraDropdown(gr.HTML):
+    """
+    Custom dropdown (More-style).
+    Outputs: selected option string, e.g. "Dolly Left"
+    """
+    def __init__(self, choices, value="None", title="Camera LoRA", **kwargs):
+        if not choices:
+            raise ValueError("CameraDropdown requires choices.")
+        uid = uuid.uuid4().hex[:8]
+        safe_choices = [str(c) for c in choices]
+        items_html = "\n".join(
+            f"""<button type="button" class="cd-item" data-value="{c}">{c}</button>"""
+            for c in safe_choices
+        )
+        html_template = f"""
+        <div class="cd-wrap" data-cd="{uid}">
+          <button type="button" class="cd-trigger" aria-haspopup="menu" aria-expanded="false">
+            <span class="cd-trigger-text">More</span>
+            <span class="cd-caret">▾</span>
+          </button>
+          <div class="cd-menu" role="menu" aria-hidden="true">
+            <div class="cd-title">{title}</div>
+            <div class="cd-items">
+              {items_html}
+            </div>
+          </div>
+        </div>
+        """
+        js_on_load = r"""
+        (() => {
+        const wrap = element.querySelector(".cd-wrap");
+        const trigger = element.querySelector(".cd-trigger");
+        const triggerText = element.querySelector(".cd-trigger-text");
+        const menu = element.querySelector(".cd-menu");
+        const items = Array.from(element.querySelectorAll(".cd-item"));
+        if (!wrap || !trigger || !menu || !items.length) return;
+        function closeMenu() {
+            menu.classList.remove("open");
+            trigger.setAttribute("aria-expanded", "false");
+            menu.setAttribute("aria-hidden", "true");
+        }
+        function openMenu() {
+            menu.classList.add("open");
+            trigger.setAttribute("aria-expanded", "true");
+            menu.setAttribute("aria-hidden", "false");
+        }
+        function setValue(val, shouldTrigger = false) {
+            const v = (val ?? "None");
+            props.value = v;
+            triggerText.textContent = v;
+            items.forEach(btn => {
+            btn.classList.toggle("selected", btn.dataset.value === v);
+            });
+            if (shouldTrigger) trigger("change", props.value);
+        }
+        // Toggle menu
+        trigger.addEventListener("pointerdown", (e) => {
+            e.preventDefault();   // prevents focus/blur weirdness
+            e.stopPropagation();
+            if (menu.classList.contains("open")) closeMenu();
+            else openMenu();
+        });
+        // Close on outside interaction (use capture so it wins)
+        document.addEventListener("pointerdown", (e) => {
+            if (!wrap.contains(e.target)) closeMenu();
+        }, true);
+        // Close on ESC
+        document.addEventListener("keydown", (e) => {
+            if (e.key === "Escape") closeMenu();
+        });
+        // Close when focus leaves the dropdown (keyboard users)
+        wrap.addEventListener("focusout", (e) => {
+            // if the newly-focused element isn't inside wrap, close
+            if (!wrap.contains(e.relatedTarget)) closeMenu();
+        });
+        // Item selection: use pointerdown so it closes immediately
+        items.forEach((btn) => {
+            btn.addEventListener("pointerdown", (e) => {
+            e.preventDefault();
+            e.stopPropagation();
+            // close first so it never "sticks" open
+            closeMenu();
+            setValue(btn.dataset.value, true);
+            });
+        });
+        // init
+        setValue((props.value ?? "None"), false);
+        // sync from Python updates
+        let last = props.value;
+        const syncFromProps = () => {
+            if (props.value !== last) {
+            last = props.value;
+            setValue(last, false);
+            }
+            requestAnimationFrame(syncFromProps);
+        };
+        requestAnimationFrame(syncFromProps);
         })();
         """
         super().__init__(
             **kwargs
         )
 def generate_video_example(input_image, prompt, duration, progress=gr.Progress(track_tqdm=True)):
+    output_video, seed = generate_video(
+        input_image,
+        prompt,
+        5,                      # duration seconds
+        True,                    # enhance_prompt
+        42,                      # seed
+        True,                    # randomize_seed
+        DEFAULT_1_STAGE_HEIGHT,  # height
+        DEFAULT_1_STAGE_WIDTH,   # width
+        "No LoRA",
+        progress
+    )
     return output_video
+def get_duration(
+    input_image,
+    prompt,
+    duration,
+    enhance_prompt,
+    seed,
+    randomize_seed,
+    height,
+    width,
+    camera_lora,
+    progress
+):
+    if duration <= 5:
+        return 80
+    elif duration <= 10:
+        return 120
+    else:
+        return 180
 @spaces.GPU(duration=get_duration)
 def generate_video(
     randomize_seed: bool = True,
     height: int = DEFAULT_1_STAGE_HEIGHT,
     width: int = DEFAULT_1_STAGE_WIDTH,
+    camera_lora: str = "No LoRA",
     progress=gr.Progress(track_tqdm=True),
 ):
     """
         randomize_seed: If True, a random seed is generated for each run.
         height: Output video height in pixels.
         width: Output video width in pixels.
+        camera_lora: Camera motion control LoRA to apply during generation (enables exactly one at runtime).
         progress: Gradio progress tracker.
     Returns:
         A tuple of:
             - output_path: Path to the generated MP4 video file.
             - seed: The seed used for generation.
         del embeddings, final_prompt, status
         torch.cuda.empty_cache()
+        # Map dropdown name -> adapter index
+        name_to_idx = {name: idx for name, idx in RUNTIME_LORA_CHOICES}
+        selected_idx = name_to_idx.get(camera_lora, -1)
+        # Disable all runtime adapters first (0..N-1)
+        # N here is len(RUNTIME_LORA_CHOICES)-1 because "None" isn't an adapter
+        for i in range(len(RUNTIME_LORA_CHOICES) - 1):
+            set_lora_enabled(pipeline._transformer, i, False)
+        # Enable selected one (if any)
+        if selected_idx >= 0:
+            set_lora_enabled(pipeline._transformer, selected_idx, True)
         # Run inference - progress automatically tracks tqdm from pipeline
         pipeline(
             prompt=prompt,
     duration_s = int(duration[:-1])
     return duration_s
 css = """
+    /* Make the row behave nicely */
+    #controls-row {
+    display: flex;
+    align-items: center;
+    gap: 12px;
+    flex-wrap: nowrap; /* or wrap if you prefer on small screens */
+    }
+    /* Stop these components from stretching */
+    #controls-row > * {
+    flex: 0 0 auto !important;
+    width: auto !important;
+    min-width: 0 !important;
+    }
+    #controls-row #camera_lora_ui {
+    margin-left: auto !important;
+    }
+    /* Gradio HTML components often have an inner wrapper div that is width:100% */
+    #camera_lora_ui,
+    #camera_lora_ui > div {
+    width: fit-content !important;
+    }
+    /* Same idea for your radio HTML blocks (optional but helps) */
+    #radioanimated_duration,
+    #radioanimated_duration > div,
+    #radioanimated_resolution,
+    #radioanimated_resolution > div {
+    width: fit-content !important;
+    }
     #col-container {
         margin: 0 auto;
         max-width: 1600px;
     }
     """
+css += """
+    /* --- prompt box --- */
+    .ds-prompt{
+      width: 100%;
+      max-width: 720px;
+      margin-top: 3px;
+    }
+    .ds-textarea{
+      width: 100%;
+      box-sizing: border-box;
+      background: #2b2b2b;
+      color: rgba(255,255,255,0.9);
+      border: 1px solid rgba(255,255,255,0.12);
+      border-radius: 14px;
+      padding: 14px 16px;
+      outline: none;
+      font-family: ui-sans-serif, system-ui, -apple-system, Segoe UI, Roboto, Arial;
+      font-size: 15px;
+      line-height: 1.35;
+      resize: none;
+      height: 94px;
+      min-height: 94px;
+      max-height: 94px;
+      overflow-y: auto;
+    }
+    .ds-textarea::placeholder{
+      color: rgba(255,255,255,0.55);
+    }
+    .ds-textarea:focus{
+      border-color: rgba(255,255,255,0.22);
+      box-shadow: 0 0 0 3px rgba(255,255,255,0.06);
+    }
+    """
+css += """
+    /* ---- camera dropdown ---- */
+    /* 1) Fix overlap: make the Gradio HTML block shrink-to-fit when it contains a CameraDropdown.
+    Gradio uses .gr-html for HTML components in most versions; older themes sometimes use .gradio-html.
+    This keeps your big header HTML unaffected because it doesn't contain .cd-wrap.
+    */
+    /* 2) Actual dropdown layout */
+    .cd-wrap{
+    position: relative;
+    display: inline-block;
+    }
+    /* 3) Match RadioAnimated pill size/feel */
+    .cd-trigger{
+    margin-top: 2px;
+    display: inline-flex;
+    align-items: center;
+    justify-content: center;
+    gap: 10px;
+    border: none;
+    box-sizing: border-box;
+    padding: 10px 18px;
+    min-height: 52px;
+    line-height: 1.2;
+    border-radius: 9999px;
+    background: #0b0b0b;
+    font-family: ui-sans-serif, system-ui, -apple-system, Segoe UI, Roboto, Arial;
+    font-size: 14px;
+    /* ✅ match .ra-label exactly */
+    color: rgba(255,255,255,0.7) !important;
+    font-weight: 600 !important;
+    cursor: pointer;
+    user-select: none;
+    white-space: nowrap;
+    }
+    /* Ensure inner spans match too */
+    .cd-trigger .cd-trigger-text,
+    .cd-trigger .cd-caret{
+    color: rgba(255,255,255,0.7) !important;
+    }
+    /* keep caret styling */
+    .cd-caret{
+    opacity: 0.8;
+    font-weight: 900;
+    }
+    /* 4) Ensure menu overlays neighbors and isn't clipped */
+    .cd-menu{
+    position: absolute;
+    top: calc(100% + 10px);
+    left: 0;
+    min-width: 240px;
+    background: #2b2b2b;
+    border: 1px solid rgba(255,255,255,0.14);
+    border-radius: 14px;
+    box-shadow: 0 18px 40px rgba(0,0,0,0.35);
+    padding: 10px;
+    opacity: 0;
+    transform: translateY(-6px);
+    pointer-events: none;
+    transition: opacity 160ms ease, transform 160ms ease;
+    z-index: 9999; /* was 50 */
+    }
+    .cd-menu.open{
+    opacity: 1;
+    transform: translateY(0);
+    pointer-events: auto;
+    }
+    .cd-title{
+    padding: 6px 8px 10px 8px;
+    font-size: 12px;
+    font-weight: 800;
+    letter-spacing: 0.02em;
+    color: rgba(255,255,255,0.55);
+    text-transform: none;
+    }
+    .cd-items{
+    display: flex;
+    flex-direction: column;
+    gap: 6px;
+    }
+    .cd-item{
+    width: 100%;
+    text-align: left;
+    border: none;
+    background: rgba(255,255,255,0.06);
+    color: rgba(255,255,255,0.92);
+    padding: 10px 10px;
+    border-radius: 12px;
+    cursor: pointer;
+    font-size: 14px;
+    font-weight: 700;
+    transition: background 120ms ease, transform 80ms ease;
+    }
+    .cd-item:hover{
+    background: rgba(255,255,255,0.10);
+    }
+    .cd-item:active{
+    transform: translateY(1px);
+    }
+    .cd-item.selected{
+    background: rgba(139,255,151,0.22);
+    border: 1px solid rgba(139,255,151,0.35);
+    }
+    """
 with gr.Blocks(title="LTX-2 Video Distilled 🎥🔈") as demo:
     gr.HTML(
                     height=512
                 )
+                prompt_ui = PromptBox(
+                    value="Make this image come alive with cinematic motion, smooth animation",
+                    elem_id="prompt_ui",
+                )
                 prompt = gr.Textbox(
                     label="Prompt",
                     value="Make this image come alive with cinematic motion, smooth animation",
                     lines=3,
                     max_lines=3,
+                    placeholder="Describe the motion and animation you want...",
+                    visible=False
                 )
                 enhance_prompt = gr.Checkbox(
             with gr.Column(elem_id="step-column"):
                 output_video = gr.Video(label="Generated Video", autoplay=True, height=512)
+                with gr.Row(elem_id="controls-row"):
                         radioanimated_duration = RadioAnimated(
                             choices=["3s", "5s", "10s", "15s"],
                             value="3s",
                             step=0.1,
                             visible=False
                         )
                         radioanimated_resolution = RadioAnimated(
                             choices=["768x512", "512x512", "512x768"],
                             value=f"{DEFAULT_1_STAGE_WIDTH}x{DEFAULT_1_STAGE_HEIGHT}",
                         width = gr.Number(label="Width", value=DEFAULT_1_STAGE_WIDTH, precision=0, visible=False)
                         height = gr.Number(label="Height", value=DEFAULT_1_STAGE_HEIGHT, precision=0, visible=False)
+                        camera_lora_ui = CameraDropdown(
+                            choices=[name for name, _ in RUNTIME_LORA_CHOICES],
+                            value="No LoRA",
+                            title="Camera LoRA",
+                            elem_id="camera_lora_ui",
+                        )
+                        # Hidden real dropdown (backend value)
+                        camera_lora = gr.Dropdown(
+                            label="Camera Control LoRA",
+                            choices=[name for name, _ in RUNTIME_LORA_CHOICES],
+                            value="No LoRA",
+                            visible=False
+                        )
                 generate_btn = gr.Button("🤩 Generate Video", variant="primary", elem_classes="button-gradient")
+    camera_lora_ui.change(
+        fn=lambda x: x,
+        inputs=camera_lora_ui,
+        outputs=camera_lora,
+        api_visibility="private"
+    )
     radioanimated_duration.change(
         fn=apply_duration,
         outputs=[width, height],
         api_visibility="private"
     )
+    prompt_ui.change(
+        fn=lambda x: x,
+        inputs=prompt_ui,
+        outputs=prompt,
+        api_visibility="private"
+    )
     generate_btn.click(
         fn=generate_video,
             randomize_seed,
             height,
             width,
+            camera_lora,
         ],
         outputs=[output_video,seed]
     )
         ],
         fn=generate_video_example,
+        inputs=[input_image, prompt_ui],
         outputs = [output_video],
         label="Example",
         cache_examples=True,

packages/ltx-core/src/ltx_core/loader/fuse_loras.py CHANGED Viewed

@@ -3,6 +3,7 @@ import triton
 from ltx_core.loader.kernels import fused_add_round_kernel
 from ltx_core.loader.primitives import LoraStateDictWithStrength, StateDict
 BLOCK_SIZE = 1024
@@ -59,42 +60,59 @@ def _prepare_deltas(
         return deltas[0]
     return torch.sum(torch.stack(deltas, dim=0), dim=0)
 def apply_loras(
     model_sd: StateDict,
     lora_sd_and_strengths: list[LoraStateDictWithStrength],
     dtype: torch.dtype,
     destination_sd: StateDict | None = None,
-) -> StateDict:
-    sd = {}
-    if destination_sd is not None:
-        sd = destination_sd.sd
     size = 0
     device = torch.device("meta")
     inner_dtypes = set()
     for key, weight in model_sd.sd.items():
         if weight is None:
             continue
         device = weight.device
         target_dtype = dtype if dtype is not None else weight.dtype
-        deltas_dtype = target_dtype if target_dtype not in [torch.float8_e4m3fn, torch.float8_e5m2] else torch.bfloat16
         deltas = _prepare_deltas(lora_sd_and_strengths, key, deltas_dtype, device)
         if deltas is None:
             if key in sd:
                 continue
-            deltas = weight.clone().to(dtype=target_dtype, device=device)
-        elif weight.dtype == torch.float8_e4m3fn:
-            if str(device).startswith("cuda"):
-                deltas = calculate_weight_float8_(deltas, weight)
-            else:
-                deltas.add_(weight.to(dtype=deltas.dtype, device=device))
-        elif weight.dtype == torch.bfloat16:
-            deltas.add_(weight)
         else:
-            raise ValueError(f"Unsupported dtype: {weight.dtype}")
-        sd[key] = deltas.to(dtype=target_dtype)
         inner_dtypes.add(target_dtype)
-        size += deltas.nbytes
-    if destination_sd is not None:
-        return destination_sd
-    return StateDict(sd, device, size, inner_dtypes)

 from ltx_core.loader.kernels import fused_add_round_kernel
 from ltx_core.loader.primitives import LoraStateDictWithStrength, StateDict
+from typing import Iterable
 BLOCK_SIZE = 1024
         return deltas[0]
     return torch.sum(torch.stack(deltas, dim=0), dim=0)
 def apply_loras(
     model_sd: StateDict,
     lora_sd_and_strengths: list[LoraStateDictWithStrength],
     dtype: torch.dtype,
     destination_sd: StateDict | None = None,
+    return_affected: bool = False,
+) -> StateDict | tuple[StateDict, list[str]]:
+    sd = destination_sd.sd if destination_sd is not None else {}
     size = 0
     device = torch.device("meta")
     inner_dtypes = set()
+    affected_weight_keys: list[str] = []
+    affected_module_prefixes: set[str] = set()
     for key, weight in model_sd.sd.items():
         if weight is None:
             continue
+        if not key.endswith(".weight"):
+            # optional: skip non-weight tensors if your SD has them
+            continue
         device = weight.device
         target_dtype = dtype if dtype is not None else weight.dtype
+        deltas_dtype = target_dtype  # you said ignore fp8 path
         deltas = _prepare_deltas(lora_sd_and_strengths, key, deltas_dtype, device)
+        # Record which weights are actually modified by LoRA
+        if deltas is not None:
+            affected_weight_keys.append(key)
+            affected_module_prefixes.add(key[: -len(".weight")])
         if deltas is None:
             if key in sd:
                 continue
+            out = weight.clone().to(dtype=target_dtype, device=device)
         else:
+            # normal add_ path (bf16 etc)
+            out = deltas.to(dtype=target_dtype)
+            # IMPORTANT: add base weight
+            out.add_(weight.to(dtype=out.dtype, device=device))
+        sd[key] = out
         inner_dtypes.add(target_dtype)
+        size += out.nbytes
+    result = destination_sd if destination_sd is not None else StateDict(sd, device, size, inner_dtypes)
+    if return_affected:
+        # sorted for stable output
+        affected = sorted(affected_module_prefixes)
+        return result, affected
+    return result

packages/ltx-core/src/ltx_core/loader/single_gpu_model_builder.py CHANGED Viewed

@@ -3,6 +3,7 @@ from dataclasses import dataclass, field, replace
 from typing import Generic
 import torch
 from ltx_core.loader.fuse_loras import apply_loras
 from ltx_core.loader.module_ops import ModuleOps
@@ -22,6 +23,109 @@ from ltx_core.model.model_protocol import ModelConfigurator, ModelType
 logger: logging.Logger = logging.getLogger(__name__)
 @dataclass(frozen=True)
 class SingleGPUModelBuilder(Generic[ModelType], ModelBuilderProtocol[ModelType], LoRAAdaptableProtocol):
     """
@@ -93,9 +197,29 @@ class SingleGPUModelBuilder(Generic[ModelType], ModelBuilderProtocol[ModelType],
         ]
         final_sd = apply_loras(
             model_sd=model_state_dict,
-            lora_sd_and_strengths=lora_sd_and_strengths,
             dtype=dtype,
             destination_sd=model_state_dict if isinstance(self.registry, DummyRegistry) else None,
         )
         meta_model.load_state_dict(final_sd.sd, strict=False, assign=True)
-        return self._return_model(meta_model, device)

 from typing import Generic
 import torch
+import torch.nn as nn
 from ltx_core.loader.fuse_loras import apply_loras
 from ltx_core.loader.module_ops import ModuleOps
 logger: logging.Logger = logging.getLogger(__name__)
+def get_submodule_and_parent(root: nn.Module, path: str):
+    """
+    Returns (parent_module, child_name, child_module)
+    where child_module is reachable at `path` from root.
+    Supports numeric segments for Sequential/ModuleList.
+    """
+    parts = path.split(".")
+    parent = root
+    for p in parts[:-1]:
+        if p.isdigit():
+            parent = parent[int(p)]  # Sequential/ModuleList
+        else:
+            parent = getattr(parent, p)
+    last = parts[-1]
+    if last.isdigit():
+        child = parent[int(last)]
+    else:
+        child = getattr(parent, last)
+    return parent, last, child
+def set_submodule(root: nn.Module, path: str, new_module: nn.Module):
+    parent, last, _ = get_submodule_and_parent(root, path)
+    if last.isdigit():
+        parent[int(last)] = new_module
+    else:
+        setattr(parent, last, new_module)
+class MultiLoraLinear(nn.Module):
+    def __init__(self, base: nn.Linear):
+        super().__init__()
+        self.base = base
+        self.adapters: list[tuple[torch.Tensor, torch.Tensor, float]] = []
+        self.enabled: list[bool] = []
+    def add_adapter(self, A: torch.Tensor, B: torch.Tensor, scale: float, enabled: bool = True):
+        # store as buffers for inference (keeps them off .parameters())
+        idx = len(self.adapters)
+        self.register_buffer(f"lora_A_{idx}", A, persistent=False)
+        self.register_buffer(f"lora_B_{idx}", B, persistent=False)
+        self.adapters.append((A, B, float(scale)))
+        self.enabled.append(bool(enabled))
+    def set_enabled(self, idx: int, enabled: bool):
+        if 0 <= idx < len(self.enabled):
+            self.enabled[idx] = enabled
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        out = self.base(x)
+        # add enabled adapters
+        for i, on in enumerate(self.enabled):
+            if not on:
+                continue
+            A = getattr(self, f"lora_A_{i}")
+            B = getattr(self, f"lora_B_{i}")
+            scale = self.adapters[i][2]
+            out = out + ((x @ A.t()) @ B.t()) * scale
+        return out
+def set_lora_enabled(model: nn.Module, adapter_idx: int, enabled: bool):
+    for m in model.modules():
+        if isinstance(m, MultiLoraLinear):
+            m.set_enabled(adapter_idx, enabled)
+def patch_only_affected_linears(
+    model: nn.Module,
+    lora_sd: dict,
+    affected_modules: list[str],
+    strength: float,
+    adapter_idx: int,
+    default_enabled: bool = False,
+):
+    for prefix in affected_modules:
+        _, _, mod = get_submodule_and_parent(model, prefix)
+        # unwrap / wrap
+        if isinstance(mod, MultiLoraLinear):
+            wrapped = mod
+        else:
+            if not isinstance(mod, nn.Linear):
+                continue
+            wrapped = MultiLoraLinear(mod)
+            set_submodule(model, prefix, wrapped)
+        key_a = f"{prefix}.lora_A.weight"
+        key_b = f"{prefix}.lora_B.weight"
+        if key_a not in lora_sd or key_b not in lora_sd:
+            continue
+        base_device = wrapped.base.weight.device
+        base_dtype = wrapped.base.weight.dtype
+        A = lora_sd[key_a].to(device=base_device, dtype=base_dtype)
+        B = lora_sd[key_b].to(device=base_device, dtype=base_dtype)
+        # parity with your current merge behavior:
+        scale = strength
+        # Ensure adapter list indices align across layers
+        # If adapters are added sequentially per adapter_idx, this will line up.
+        wrapped.add_adapter(A, B, scale=scale, enabled=default_enabled)
 @dataclass(frozen=True)
 class SingleGPUModelBuilder(Generic[ModelType], ModelBuilderProtocol[ModelType], LoRAAdaptableProtocol):
     """
         ]
         final_sd = apply_loras(
             model_sd=model_state_dict,
+            lora_sd_and_strengths=[lora_sd_and_strengths[0]],
             dtype=dtype,
             destination_sd=model_state_dict if isinstance(self.registry, DummyRegistry) else None,
         )
         meta_model.load_state_dict(final_sd.sd, strict=False, assign=True)
+        model = self._return_model(meta_model, device)
+        _, affected_modules = apply_loras(
+            model_sd=model_state_dict,
+            lora_sd_and_strengths=lora_sd_and_strengths,
+            dtype=dtype,
+            destination_sd=None,
+            return_affected=True,
+        )
+        for runtime_idx, (lora_sd, strength) in enumerate(zip(lora_state_dicts[1:], lora_strengths[1:], strict=True)):
+            patch_only_affected_linears(
+                model,
+                lora_sd.sd,
+                affected_modules,
+                strength=strength,
+                adapter_idx=runtime_idx,
+                default_enabled=False,  # start off
+            )
+        return model