Spaces:

TSXu
/

UniCalli_Dev

Running on Zero

App Files Files Community

TSXu commited on Jan 28

Commit

3974489

1 Parent(s): 624511f

Remove AoT compilation from main (saved to feature/aot-compilation branch)

Browse files

Files changed (2) hide show

app.py +0 -201
requirements.txt +0 -3

app.py CHANGED Viewed

@@ -151,172 +151,6 @@ def init_generator():
     return generator
-# ============== AoT Compilation ==============
-COMPILED_MODEL_REPO = "TSXu/Unicalli_Pro"  # Where to save/load compiled model
-COMPILED_MODEL_FILENAME = "compiled_flux_h200.pt2"  # Compiled model filename (fp32)
-COMPILED_MODEL_FP8_FILENAME = "compiled_flux_h200_fp8.pt2"  # FP8 quantized version
-def check_compiled_model_exists():
-    """Check if compiled model exists on HuggingFace Hub"""
-    import os
-    from huggingface_hub import hf_hub_url, get_hf_file_metadata
-    try:
-        hf_token = os.environ.get("HF_TOKEN", None)
-        url = hf_hub_url(COMPILED_MODEL_REPO, COMPILED_MODEL_FILENAME)
-        metadata = get_hf_file_metadata(url, token=hf_token)
-        print(f"✓ Found compiled model on Hub: {COMPILED_MODEL_FILENAME} ({metadata.size / 1e9:.2f} GB)")
-        return True
-    except Exception as e:
-        print(f"Compiled model not found on Hub: {e}")
-        return False
-@spaces.GPU(duration=300)  # 5 minutes for compilation
-def compile_and_upload_model(use_fp8: bool = False):
-    """
-    Compile model with AoT and upload to HuggingFace Hub.
-    This only needs to be done once!
-    Args:
-        use_fp8: Whether to use FP8 quantization (faster, less memory, H200 only)
-    """
-    import torch
-    import os
-    from huggingface_hub import HfApi
-    global generator
-    # Initialize generator if not already done
-    if generator is None:
-        generator = init_generator()
-    model = generator.model
-    filename = COMPILED_MODEL_FP8_FILENAME if use_fp8 else COMPILED_MODEL_FILENAME
-    print("="*50)
-    print(f"Starting AoT Compilation {'with FP8' if use_fp8 else '(FP32)'}...")
-    print("This may take 5-10 minutes...")
-    print("="*50)
-    # Configure Inductor for optimal performance
-    torch._inductor.config.conv_1x1_as_mm = True
-    torch._inductor.config.coordinate_descent_tuning = True
-    try:
-        # Step 0: Apply FP8 quantization if requested
-        if use_fp8:
-            print("Step 0: Applying FP8 quantization...")
-            try:
-                from torchao.quantization import quantize_, Float8DynamicActivationFloat8WeightConfig
-                quantize_(model, Float8DynamicActivationFloat8WeightConfig())
-                print("  ✓ FP8 quantization applied!")
-            except Exception as e:
-                print(f"  ⚠️ FP8 quantization failed: {e}")
-                print("  Falling back to FP32...")
-                use_fp8 = False
-                filename = COMPILED_MODEL_FILENAME
-        # Step 1: Capture example inputs
-        print("Step 1/4: Capturing example inputs...")
-        with spaces.aoti_capture(model) as call:
-            # Run minimal inference to capture inputs
-            generator.generate(
-                text="新年快乐发大财",
-                font_style="楷",
-                author=None,
-                num_steps=1,
-                seed=42,
-            )
-        print(f"  Captured {len(call.args)} args, {len(call.kwargs)} kwargs")
-        # Step 2: Export model
-        print("Step 2/4: Exporting model graph...")
-        try:
-            exported = torch.export.export(
-                model,
-                args=call.args,
-                kwargs=call.kwargs,
-                strict=False,  # Allow non-strict tracing
-            )
-            print("  Export complete!")
-        except Exception as export_error:
-            print(f"  ❌ Export failed: {export_error}")
-            if use_fp8:
-                print("  FP8 may be incompatible with torch.export on this model.")
-                print("  Try unchecking FP8 and compiling with FP32 instead.")
-            return f"❌ Export failed: {export_error}\n\nTry disabling FP8 quantization."
-        # Step 3: Compile with AOTInductor
-        print("Step 3/4: Compiling with AOTInductor...")
-        compiled = spaces.aoti_compile(exported)
-        print("  Compilation complete!")
-        # Step 4: Upload to Hub
-        print("Step 4/4: Uploading to HuggingFace Hub...")
-        local_path = f"/tmp/{filename}"
-        torch.save(compiled, local_path)
-        hf_token = os.environ.get("HF_TOKEN", None)
-        api = HfApi()
-        api.upload_file(
-            path_or_fileobj=local_path,
-            path_in_repo=filename,
-            repo_id=COMPILED_MODEL_REPO,
-            repo_type="model",
-            token=hf_token,
-        )
-        print(f"✓ Uploaded compiled model to {COMPILED_MODEL_REPO}/{filename}")
-        # Apply compiled model
-        spaces.aoti_apply(compiled, model)
-        print("✓ Applied compiled model!")
-        mode = "FP8" if use_fp8 else "FP32"
-        return f"✅ Compilation ({mode}) and upload successful!"
-    except Exception as e:
-        import traceback
-        traceback.print_exc()
-        return f"❌ Compilation failed: {e}"
-@spaces.GPU(duration=60)  # 1 minute for loading
-def load_and_apply_compiled_model(use_fp8: bool = False):
-    """Load compiled model from Hub and apply to generator"""
-    import torch
-    import os
-    from huggingface_hub import hf_hub_download
-    global generator
-    if generator is None:
-        generator = init_generator()
-    filename = COMPILED_MODEL_FP8_FILENAME if use_fp8 else COMPILED_MODEL_FILENAME
-    try:
-        hf_token = os.environ.get("HF_TOKEN", None)
-        print(f"Downloading {'FP8' if use_fp8 else 'FP32'} compiled model...")
-        local_path = hf_hub_download(
-            repo_id=COMPILED_MODEL_REPO,
-            filename=filename,
-            token=hf_token
-        )
-        compiled = torch.load(local_path)
-        spaces.aoti_apply(compiled, generator.model)
-        mode = "FP8" if use_fp8 else "FP32"
-        print(f"✓ Applied pre-compiled model ({mode})!")
-        return f"✅ Loaded and applied {mode} compiled model!"
-    except Exception as e:
-        print(f"Failed to load compiled model: {e}")
-        return f"❌ Failed to load compiled model: {e}"
 def update_font_choices(author: str):
     """
     Update available font choices based on selected author
@@ -514,41 +348,6 @@ with gr.Blocks(title="UniCalli - Chinese Calligraphy Generator / 中国书法生
                 interactive=False
             )
-    # Admin section for AoT compilation
-    with gr.Accordion("🔧 管理员工具 / Admin Tools (AoT Compilation)", open=False):
-        gr.Markdown("""
-        **AoT 编译** 可以将模型预编译以加速推理 (~1.5x)。
-        - **首次使用**: 点击"编译并上传"，等待 5-10 分钟
-        - **后续使用**: 点击"加载已编译模型"
-        **FP8 量化** (仅 H200): 更快的推理 + 更少显存，推荐在 ZeroGPU 上使用！
-        """)
-        fp8_checkbox = gr.Checkbox(
-            label="使用 FP8 量化 / Use FP8 Quantization (推荐/Recommended for H200)",
-            value=True,
-            info="FP8 提供更快推理和更低显存，仅在 H200 上支持"
-        )
-        with gr.Row():
-            compile_btn = gr.Button("🔨 编译并上传 / Compile & Upload", variant="secondary")
-            load_compiled_btn = gr.Button("📥 加载已编译模型 / Load Compiled", variant="secondary")
-        compile_status = gr.Textbox(label="编译状态 / Compilation Status", value="", interactive=False)
-        compile_btn.click(
-            fn=compile_and_upload_model,
-            inputs=[fp8_checkbox],
-            outputs=[compile_status]
-        )
-        load_compiled_btn.click(
-            fn=load_and_apply_compiled_model,
-            inputs=[fp8_checkbox],
-            outputs=[compile_status]
-        )
     # Author info section
     with gr.Accordion("📚 可用书法家列表 / Available Calligraphers（共 {} 位 / {} total）".format(len(AUTHOR_LIST), len(AUTHOR_LIST)), open=False):
         author_info_md = "| 书法家 / Calligrapher | 可用字体 / Available Fonts |\n|--------|----------|\n"

     return generator
 def update_font_choices(author: str):
     """
     Update available font choices based on selected author
                 interactive=False
             )
     # Author info section
     with gr.Accordion("📚 可用书法家列表 / Available Calligraphers（共 {} 位 / {} total）".format(len(AUTHOR_LIST), len(AUTHOR_LIST)), open=False):
         author_info_md = "| 书法家 / Calligrapher | 可用字体 / Available Fonts |\n|--------|----------|\n"

requirements.txt CHANGED Viewed

@@ -14,9 +14,6 @@ timm
 sentencepiece
 diffusers
-# AoT Compilation & Optimization
-torchao  # FP8 quantization for H200
-kernels  # Pre-built kernels including FlashAttention-3
 # Data processing
 datasets

 sentencepiece
 diffusers
 # Data processing
 datasets