Spaces:

TSXu
/

UniCalli_Dev

Running on Zero

TSXu commited on Jan 29

Commit

46eccdb

1 Parent(s): 8d5a72c

Refactor AOT compilation to follow FLUX-Kontext-fp8 pattern exactly

- Use tree_map_only for static dynamic_shapes
- Quantize before export (FLUX-Kontext-fp8 order)
- Use torch.export.export instead of draft_export
- Add comprehensive logging to file

Files changed (2) hide show

.gitignore +1 -0
app.py +113 -56

.gitignore CHANGED Viewed

@@ -13,3 +13,4 @@ build/
 *.pth
 *.ckpt
 *.safetensors

 *.pth
 *.ckpt
 *.safetensors
+*.log

app.py CHANGED Viewed

@@ -7,8 +7,40 @@ With Float8 quantization and AOT compilation for faster inference
 # Install compatible torch 2.8 + torchvision 0.23 + torchao + spaces (for AOT compilation)
 # spaces.aoti_capture requires PyTorch 2.8+
 import os
 os.system('pip install --upgrade --pre --extra-index-url https://download.pytorch.org/whl/nightly/cu126 "torch>=2.8,<2.9" "torchvision>=0.23,<0.24" torchao spaces')
-print("torch 2.8 + torchvision 0.23 + torchao + spaces installation complete!")
 # IMPORTANT: import spaces first before any CUDA-related packages
 import spaces
@@ -191,63 +223,88 @@ def compile_model_first_time():
     """
     global _is_optimized, generator
-    print("="*50)
-    print("First-time run: Loading model and AOT compiling...")
-    print("="*50)
-    # Load model
-    gen = init_generator()
-    model = gen.model
-    # ========== AOT Compilation (FLUX-Kontext-fp8 pattern) ==========
-    # Step 1: Capture model forward during a real inference
-    print("Step 1: Capturing model forward pass with spaces.aoti_capture...")
-    with spaces.aoti_capture(model) as call:
-        gen.generate(
-            text="测试",
-            font_style="楷",
-            author=None,
-            num_steps=1,
-            seed=42,
         )
-    print("✓ Forward pass captured!")
-    # Step 2: Build dynamic shapes (None = fixed shapes)
-    print("Step 2: Building dynamic shapes...")
-    dynamic_shapes = tree_map_only((torch.Tensor, bool), lambda t: None, call.kwargs)
-    print("✓ Dynamic shapes built!")
-    # Step 3: Apply Float8 quantization
-    print("Step 3: Applying Float8 quantization...")
-    quantize_(model, Float8DynamicActivationFloat8WeightConfig())
-    print("✓ Float8 quantization complete!")
-    # Step 4: Export model with torch.export
-    print("Step 4: Exporting model with torch.export...")
-    exported = torch.export.export(
-        mod=model,
-        args=call.args,
-        kwargs=call.kwargs,
-        dynamic_shapes=dynamic_shapes,
-    )
-    print("✓ Model exported!")
-    # Step 5: AOT compile with spaces.aoti_compile
-    print("Step 5: AOT compiling with spaces.aoti_compile...")
-    print(f"  Inductor configs: {INDUCTOR_CONFIGS}")
-    compiled = spaces.aoti_compile(exported, INDUCTOR_CONFIGS)
-    print("✓ AOT compilation complete!")
-    # Step 6: Apply compiled model
-    print("Step 6: Applying compiled model...")
-    spaces.aoti_apply(compiled, model)
-    print("✓ AOT compiled model applied!")
-    _is_optimized = True
-    print("="*50)
-    print("✓ Model loaded and AOT compiled!")
-    print("="*50)
     return gen

 # Install compatible torch 2.8 + torchvision 0.23 + torchao + spaces (for AOT compilation)
 # spaces.aoti_capture requires PyTorch 2.8+
 import os
+import sys
+import logging
+import traceback
+from datetime import datetime
+# Setup logging to file
+LOG_FILE = "aot_compile.log"
+logging.basicConfig(
+    level=logging.DEBUG,
+    format='%(asctime)s [%(levelname)s] %(message)s',
+    handlers=[
+        logging.FileHandler(LOG_FILE, mode='w', encoding='utf-8'),
+        logging.StreamHandler(sys.stdout)
+    ]
+)
+logger = logging.getLogger(__name__)
+# Also redirect print to log
+class LoggingPrinter:
+    def __init__(self, logger, original_stdout):
+        self.logger = logger
+        self.original_stdout = original_stdout
+    def write(self, message):
+        if message.strip():
+            self.logger.info(message.strip())
+        self.original_stdout.write(message)
+    def flush(self):
+        self.original_stdout.flush()
+# Keep original stdout for gradio
+_original_stdout = sys.stdout
 os.system('pip install --upgrade --pre --extra-index-url https://download.pytorch.org/whl/nightly/cu126 "torch>=2.8,<2.9" "torchvision>=0.23,<0.24" torchao spaces')
+logger.info("torch 2.8 + torchvision 0.23 + torchao + spaces installation complete!")
 # IMPORTANT: import spaces first before any CUDA-related packages
 import spaces
     """
     global _is_optimized, generator
+    logger.info("="*50)
+    logger.info("First-time run: Loading model and AOT compiling...")
+    logger.info("="*50)
+    try:
+        # Load model
+        gen = init_generator()
+        model = gen.model
+        # ========== AOT Compilation (FLUX-Kontext-fp8 pattern exactly) ==========
+        # Step 1: Capture model forward during a real inference
+        logger.info("Step 1: Capturing model forward pass with spaces.aoti_capture...")
+        with spaces.aoti_capture(model) as call:
+            gen.generate(
+                text="测试",
+                font_style="楷",
+                author=None,
+                num_steps=1,
+                seed=42,
+            )
+        logger.info("✓ Forward pass captured!")
+        # Log call info
+        logger.info(f"  call.args types: {[type(a).__name__ for a in call.args]}")
+        logger.info(f"  call.kwargs keys: {list(call.kwargs.keys())}")
+        for k, v in call.kwargs.items():
+            if hasattr(v, 'shape'):
+                logger.info(f"    {k}: tensor shape={v.shape}, dtype={v.dtype}")
+            else:
+                logger.info(f"    {k}: {type(v).__name__} = {v}")
+        # Step 2: Build dynamic_shapes (FLUX-Kontext-fp8 pattern: all static)
+        # tree_map_only maps all tensors/bools to None = static shape
+        logger.info("Step 2: Building static shapes (FLUX-Kontext-fp8 pattern)...")
+        dynamic_shapes = tree_map_only((torch.Tensor, bool), lambda t: None, call.kwargs)
+        logger.info(f"  dynamic_shapes keys: {list(dynamic_shapes.keys()) if dynamic_shapes else 'None'}")
+        logger.info("✓ Static shapes configured!")
+        # Step 3: Apply Float8 quantization BEFORE export (FLUX-Kontext-fp8 pattern)
+        logger.info("Step 3: Applying Float8 quantization...")
+        quantize_(model, Float8DynamicActivationFloat8WeightConfig())
+        logger.info("✓ Float8 quantization complete!")
+        # Step 4: Export model with torch.export.export (not draft_export)
+        logger.info("Step 4: Exporting model with torch.export.export...")
+        exported = torch.export.export(
+            mod=model,
+            args=call.args,
+            kwargs=call.kwargs,
+            dynamic_shapes=dynamic_shapes,
         )
+        logger.info("✓ Model exported!")
+        # Step 5: AOT compile with spaces.aoti_compile
+        logger.info("Step 5: AOT compiling with spaces.aoti_compile...")
+        logger.info(f"  Inductor configs: {INDUCTOR_CONFIGS}")
+        compiled = spaces.aoti_compile(exported, INDUCTOR_CONFIGS)
+        logger.info("✓ AOT compilation complete!")
+        # Step 6: Apply compiled model
+        logger.info("Step 6: Applying compiled model...")
+        spaces.aoti_apply(compiled, model)
+        logger.info("✓ AOT compiled model applied!")
+        _is_optimized = True
+        logger.info("="*50)
+    except Exception as e:
+        logger.error("="*50)
+        logger.error("AOT COMPILATION FAILED!")
+        logger.error("="*50)
+        logger.error(f"Exception: {e}")
+        logger.error("Full traceback:")
+        logger.error(traceback.format_exc())
+        # Save full error to file
+        with open("aot_error.log", "w") as f:
+            f.write(f"Exception: {e}\n\n")
+            f.write(traceback.format_exc())
+        raise
+    logger.info("✓ Model loaded and AOT compiled!")
+    logger.info("="*50)
     return gen