ltx-2-distilled

Paused

App Files Files Community

linoyts HF Staff commited on Jan 6

Commit

ade6776

verified ·

1 Parent(s): afd2ca0

improvements + fixes [wip]

Browse files

Files changed (1) hide show

app.py +64 -7

app.py CHANGED Viewed

@@ -8,10 +8,14 @@ sys.path.insert(0, str(current_dir / "packages" / "ltx-core" / "src"))
 import spaces
 import gradio as gr
 import numpy as np
 import random
 from typing import Optional
 from huggingface_hub import hf_hub_download
 from ltx_pipelines.distilled import DistilledPipeline
 from ltx_core.tiling import TilingConfig
 from ltx_core.loader.primitives import LoraPathStrengthAndSDOps
@@ -31,11 +35,13 @@ DEFAULT_PROMPT = "An astronaut hatches from a fragile egg on the surface of the
 # HuggingFace Hub defaults
 DEFAULT_REPO_ID = "Lightricks/LTX-2"
-DEFAULT_GEMMA_REPO_ID = "google/gemma-3-12b-it-qat-q4_0-unquantized"
 DEFAULT_CHECKPOINT_FILENAME = "ltx-2-19b-dev-fp8.safetensors"
 DEFAULT_DISTILLED_LORA_FILENAME = "ltx-2-19b-distilled-lora-384.safetensors"
 DEFAULT_SPATIAL_UPSAMPLER_FILENAME = "ltx-2-spatial-upscaler-x2-1.0.safetensors"
 def get_hub_or_local_checkpoint(repo_id: Optional[str] = None, filename: Optional[str] = None):
     """Download from HuggingFace Hub or use local checkpoint."""
     if repo_id is None and filename is None:
@@ -66,7 +72,7 @@ print(f"Initializing pipeline with:")
 print(f"  checkpoint_path={checkpoint_path}")
 print(f"  distilled_lora_path={distilled_lora_path}")
 print(f"  spatial_upsampler_path={spatial_upsampler_path}")
-print(f"  gemma_root={DEFAULT_GEMMA_REPO_ID}")
 # Load distilled LoRA as a regular LoRA
 loras = [
@@ -77,15 +83,26 @@ loras = [
     )
 ]
 pipeline = DistilledPipeline(
     checkpoint_path=checkpoint_path,
     spatial_upsampler_path=spatial_upsampler_path,
-    gemma_root=DEFAULT_GEMMA_REPO_ID,
     loras=loras,
     fp8transformer=True,
     local_files_only=False,
 )
 print("=" * 80)
 print("Pipeline fully loaded and ready!")
 print("=" * 80)
@@ -113,20 +130,58 @@ def generate_video(
         # Create output directory if it doesn't exist
         output_dir = Path("outputs")
         output_dir.mkdir(exist_ok=True)
-        output_path = output_dir / f"video_{seed}.mp4"
         # Handle image input
         images = []
         if input_image is not None:
             # Save uploaded image temporarily
-            temp_image_path = output_dir / f"temp_input_{seed}.jpg"
             if hasattr(input_image, 'save'):
                 input_image.save(temp_image_path)
             else:
                 # If it's a file path already
-                temp_image_path = input_image
             # Format: (image_path, frame_idx, strength)
             images = [(str(temp_image_path), 0, 1.0)]
         # Run inference - progress automatically tracks tqdm from pipeline
         pipeline(
@@ -139,6 +194,8 @@ def generate_video(
             frame_rate=frame_rate,
             images=images,
             tiling_config=TilingConfig.default(),
         )
         return str(output_path), current_seed
@@ -250,4 +307,4 @@ css = '''
 .gradio-container .contain{max-width: 1200px !important; margin: 0 auto !important}
 '''
 if __name__ == "__main__":
-    demo.launch(theme=gr.themes.Citrus(), css=css)

 import spaces
 import gradio as gr
+from gradio_client import Client, handle_file
 import numpy as np
 import random
+import torch
 from typing import Optional
+from pathlib import Path
 from huggingface_hub import hf_hub_download
+from gradio_client import Client
 from ltx_pipelines.distilled import DistilledPipeline
 from ltx_core.tiling import TilingConfig
 from ltx_core.loader.primitives import LoraPathStrengthAndSDOps
 # HuggingFace Hub defaults
 DEFAULT_REPO_ID = "Lightricks/LTX-2"
 DEFAULT_CHECKPOINT_FILENAME = "ltx-2-19b-dev-fp8.safetensors"
 DEFAULT_DISTILLED_LORA_FILENAME = "ltx-2-19b-distilled-lora-384.safetensors"
 DEFAULT_SPATIAL_UPSAMPLER_FILENAME = "ltx-2-spatial-upscaler-x2-1.0.safetensors"
+# Text encoder space URL
+TEXT_ENCODER_SPACE = "linoyts/gemma-text-encoder"
 def get_hub_or_local_checkpoint(repo_id: Optional[str] = None, filename: Optional[str] = None):
     """Download from HuggingFace Hub or use local checkpoint."""
     if repo_id is None and filename is None:
 print(f"  checkpoint_path={checkpoint_path}")
 print(f"  distilled_lora_path={distilled_lora_path}")
 print(f"  spatial_upsampler_path={spatial_upsampler_path}")
+print(f"  text_encoder_space={TEXT_ENCODER_SPACE}")
 # Load distilled LoRA as a regular LoRA
 loras = [
     )
 ]
+# Initialize pipeline WITHOUT text encoder (gemma_root=None)
+# Text encoding will be done by external space
 pipeline = DistilledPipeline(
     checkpoint_path=checkpoint_path,
     spatial_upsampler_path=spatial_upsampler_path,
+    gemma_root=None,  # No text encoder in this space
     loras=loras,
     fp8transformer=True,
     local_files_only=False,
 )
+# Initialize text encoder client
+print(f"Connecting to text encoder space: {TEXT_ENCODER_SPACE}")
+try:
+    text_encoder_client = Client(TEXT_ENCODER_SPACE)
+    print("✓ Text encoder client connected!")
+except Exception as e:
+    print(f"⚠ Warning: Could not connect to text encoder space: {e}")
+    text_encoder_client = None
 print("=" * 80)
 print("Pipeline fully loaded and ready!")
 print("=" * 80)
         # Create output directory if it doesn't exist
         output_dir = Path("outputs")
         output_dir.mkdir(exist_ok=True)
+        output_path = output_dir / f"video_{current_seed}.mp4"
         # Handle image input
         images = []
+        temp_image_path = None  # Initialize to None
         if input_image is not None:
             # Save uploaded image temporarily
+            temp_image_path = output_dir / f"temp_input_{current_seed}.jpg"
             if hasattr(input_image, 'save'):
                 input_image.save(temp_image_path)
             else:
                 # If it's a file path already
+                temp_image_path = Path(input_image)
             # Format: (image_path, frame_idx, strength)
             images = [(str(temp_image_path), 0, 1.0)]
+        # Get embeddings from text encoder space
+        print(f"Encoding prompt: {prompt}")
+        if text_encoder_client is None:
+            raise RuntimeError(
+                f"Text encoder client not connected. Please ensure the text encoder space "
+                f"({TEXT_ENCODER_SPACE}) is running and accessible."
+            )
+        try:
+            # Prepare image for upload if it exists
+            image_input = None
+            if temp_image_path is not None:
+                image_input = handle_file(str(temp_image_path))
+            result = text_encoder_client.predict(
+                prompt=prompt,
+                enhance_prompt=True,
+                input_image=image_input,
+                seed=current_seed,
+                api_name="/encode_prompt"
+            )
+            embedding_path = result[0]  # Path to .pt file
+            print(f"Embeddings received from: {embedding_path}")
+            # Load embeddings
+            embeddings = torch.load(embedding_path)
+            video_context = embeddings['video_context']
+            audio_context = embeddings['audio_context']
+            print("✓ Embeddings loaded successfully")
+        except Exception as e:
+            raise RuntimeError(
+                f"Failed to get embeddings from text encoder space: {e}\n"
+                f"Please ensure {TEXT_ENCODER_SPACE} is running properly."
+            )
         # Run inference - progress automatically tracks tqdm from pipeline
         pipeline(
             frame_rate=frame_rate,
             images=images,
             tiling_config=TilingConfig.default(),
+            video_context=video_context,
+            audio_context=audio_context,
         )
         return str(output_path), current_seed
 .gradio-container .contain{max-width: 1200px !important; margin: 0 auto !important}
 '''
 if __name__ == "__main__":
+    demo.launch(theme=gr.themes.Citrus(), css=css, share=True)