Spaces:

TSXu
/

UniCalli_Dev

Running on Zero

TSXu commited on Jan 29

Commit

f1e9349

1 Parent(s): 7c94f94

Fix nested @spaces.GPU and torchao version compatibility

1. Remove nested @spaces.GPU - all AOT compilation now in single function
2. Install nightly torchao compatible with torch 2.10.0 at startup
3. All CUDA operations now in @spaces.GPU decorated functions:
- compile_model_first_time(): 300s - AOT compilation
- run_generation(): 120s - normal generation

Files changed (1) hide show

app.py +58 -67

app.py CHANGED Viewed

@@ -4,6 +4,17 @@ Gradio Demo for Chinese Calligraphy Generation - HuggingFace Space Version
 With Float8 quantization and AOT compilation for faster inference
 """
 # IMPORTANT: import spaces first before any CUDA-related packages
 import spaces
@@ -12,7 +23,6 @@ import json
 import csv
 import time
 import torch
-import os
 # Load author and font mappings from CSV
 def load_author_fonts_from_csv(csv_path):
@@ -178,85 +188,66 @@ def parse_font_style(font_style: str) -> str:
     return None
-def aot_compile_transformer(gen):
-    """
-    AOT compile the transformer using spaces.aoti_capture/compile/apply.
-    Exactly following FLUX-Kontext-fp8 pattern.
-    """
-    model = gen.model
-    @spaces.GPU(duration=300)  # 5 minutes for AOT compilation
-    def compile_transformer():
-        print("="*50)
-        print("Starting AOT compilation (FLUX-Kontext-fp8 pattern)...")
-        print("="*50)
-        # Step 1: Capture model forward during a real inference
-        print("Step 1: Capturing model forward pass with spaces.aoti_capture...")
-        with spaces.aoti_capture(model) as call:
-            # Run a sample generation to capture the forward call
-            gen.generate(
-                text="测试",
-                font_style="楷",
-                author=None,
-                num_steps=1,
-                seed=42,
-            )
-        print("✓ Forward pass captured!")
-        # Step 2: Build dynamic shapes (None = fixed shapes)
-        print("Step 2: Building dynamic shapes...")
-        dynamic_shapes = tree_map_only((torch.Tensor, bool), lambda t: None, call.kwargs)
-        print("✓ Dynamic shapes built!")
-        # Step 3: Apply Float8 quantization
-        print("Step 3: Applying Float8 quantization...")
-        quantize_(model, Float8DynamicActivationFloat8WeightConfig())
-        print("✓ Float8 quantization complete!")
-        # Step 4: Export model with torch.export
-        print("Step 4: Exporting model with torch.export...")
-        exported = torch.export.export(
-            mod=model,
-            args=call.args,
-            kwargs=call.kwargs,
-            dynamic_shapes=dynamic_shapes,
-        )
-        print("✓ Model exported!")
-        # Step 5: AOT compile with spaces.aoti_compile
-        print("Step 5: AOT compiling with spaces.aoti_compile...")
-        print(f"  Inductor configs: {INDUCTOR_CONFIGS}")
-        compiled = spaces.aoti_compile(exported, INDUCTOR_CONFIGS)
-        print("✓ AOT compilation complete!")
-        return compiled
-    # Run compilation and apply the result
-    print("Running AOT compilation...")
-    spaces.aoti_apply(compile_transformer(), model)
-    print("="*50)
-    print("✓ AOT compiled model applied!")
-    print("="*50)
 @spaces.GPU(duration=300)  # 5 minutes for first-time AOT compilation
 def compile_model_first_time():
     """
     First-time: Load model and run AOT compilation.
-    Returns the optimized generator.
     """
     global _is_optimized, generator
     print("="*50)
-    print("First-time run: Loading model and running AOT compilation...")
     print("="*50)
     # Load model
     gen = init_generator()
-    # AOT compile the transformer
-    aot_compile_transformer(gen)
     _is_optimized = True
     print("="*50)

 With Float8 quantization and AOT compilation for faster inference
 """
+# Install compatible torchao version for the current torch (following FLUX-Kontext-fp8 pattern)
+import os
+import subprocess
+print("Installing compatible torchao version...")
+subprocess.run([
+    "pip", "install", "--upgrade", "--pre",
+    "--extra-index-url", "https://download.pytorch.org/whl/nightly/cu126",
+    "torchao"
+], capture_output=True)
+print("torchao installation complete!")
 # IMPORTANT: import spaces first before any CUDA-related packages
 import spaces
 import csv
 import time
 import torch
 # Load author and font mappings from CSV
 def load_author_fonts_from_csv(csv_path):
     return None
 @spaces.GPU(duration=300)  # 5 minutes for first-time AOT compilation
 def compile_model_first_time():
     """
     First-time: Load model and run AOT compilation.
+    Exactly following FLUX-Kontext-fp8 pattern.
     """
     global _is_optimized, generator
     print("="*50)
+    print("First-time run: Loading model and AOT compiling...")
     print("="*50)
     # Load model
     gen = init_generator()
+    model = gen.model
+    # ========== AOT Compilation (FLUX-Kontext-fp8 pattern) ==========
+    # Step 1: Capture model forward during a real inference
+    print("Step 1: Capturing model forward pass with spaces.aoti_capture...")
+    with spaces.aoti_capture(model) as call:
+        gen.generate(
+            text="测试",
+            font_style="楷",
+            author=None,
+            num_steps=1,
+            seed=42,
+        )
+    print("✓ Forward pass captured!")
+    # Step 2: Build dynamic shapes (None = fixed shapes)
+    print("Step 2: Building dynamic shapes...")
+    dynamic_shapes = tree_map_only((torch.Tensor, bool), lambda t: None, call.kwargs)
+    print("✓ Dynamic shapes built!")
+    # Step 3: Apply Float8 quantization
+    print("Step 3: Applying Float8 quantization...")
+    quantize_(model, Float8DynamicActivationFloat8WeightConfig())
+    print("✓ Float8 quantization complete!")
+    # Step 4: Export model with torch.export
+    print("Step 4: Exporting model with torch.export...")
+    exported = torch.export.export(
+        mod=model,
+        args=call.args,
+        kwargs=call.kwargs,
+        dynamic_shapes=dynamic_shapes,
+    )
+    print("✓ Model exported!")
+    # Step 5: AOT compile with spaces.aoti_compile
+    print("Step 5: AOT compiling with spaces.aoti_compile...")
+    print(f"  Inductor configs: {INDUCTOR_CONFIGS}")
+    compiled = spaces.aoti_compile(exported, INDUCTOR_CONFIGS)
+    print("✓ AOT compilation complete!")
+    # Step 6: Apply compiled model
+    print("Step 6: Applying compiled model...")
+    spaces.aoti_apply(compiled, model)
+    print("✓ AOT compiled model applied!")
     _is_optimized = True
     print("="*50)