Spaces:

TSXu
/

UniCalli_Dev

Running on Zero

TSXu commited on Jan 27

Commit

d3ccd4b

1 Parent(s): c6a1e05

Add batch generation, torch.compile acceleration, fix dtype issues

- Add batch generation option (1-4 images) with gallery output
- Enable torch.compile() for inference acceleration
- Fix bfloat16/float32 dtype mismatches in pipeline
- Change default inference steps to 25
- Update examples with batch_size parameter

Files changed (4) hide show

app.py +54 -27
inference.py +8 -0
src/flux/sampling.py +4 -4
src/flux/xflux_pipeline.py +4 -4

app.py CHANGED Viewed

@@ -75,7 +75,7 @@ def init_generator():
             font_descriptions_path='dataset/chirography.json',
             author_descriptions_path='dataset/calligraphy_styles_en.json',
             use_deepspeed=False,
-            use_4bit_quantization=False,  # Disable 4-bit quantization for faster init
         )
     return generator
@@ -110,6 +110,7 @@ def generate_calligraphy(
     num_steps: int,
     seed: int,
     random_seed: bool,
 ):
     """
     Generate calligraphy based on user inputs
@@ -121,10 +122,13 @@ def generate_calligraphy(
         num_steps: Number of denoising steps
         seed: Random seed
         random_seed: Whether to use random seed
     Returns:
-        Generated image and condition image
     """
     # Validate text - must be 1-7 characters
     if len(text) < 1:
         raise gr.Error("文本不能为空 / Text cannot be empty")
@@ -146,22 +150,34 @@ def generate_calligraphy(
     # Handle seed
     if random_seed:
-        import torch
         seed = torch.randint(0, 2**32, (1,)).item()
     # Initialize generator if needed
     gen = init_generator()
-    # Generate
-    result_img, cond_img = gen.generate(
-        text=text,
-        font_style=font,
-        author=author,
-        num_steps=num_steps,
-        seed=seed,
-    )
-    return result_img, f"Seed: {seed}"
 # Create Gradio interface
@@ -215,7 +231,7 @@ with gr.Blocks(title="UniCalli - Chinese Calligraphy Generator / 中国书法生
                 label="生成步数 / Inference Steps",
                 minimum=10,
                 maximum=50,
-                value=39,
                 step=1,
                 info="更多步数 = 更高质量，但更慢 / More steps = higher quality, but slower"
             )
@@ -231,6 +247,15 @@ with gr.Blocks(title="UniCalli - Chinese Calligraphy Generator / 中国书法生
                     value=False
                 )
             generate_btn = gr.Button("🎨 生成书法 / Generate Calligraphy", variant="primary", size="lg")
         with gr.Column(scale=1):
@@ -238,15 +263,15 @@ with gr.Blocks(title="UniCalli - Chinese Calligraphy Generator / 中国书法生
             gr.Markdown("### 🖼️ 生成结果 / Generated Result")
             gr.Markdown("")  # Add spacing
-            with gr.Row():
-                gr.Column(scale=1)  # Left spacer
-                with gr.Column(scale=2):
-                    output_image = gr.Image(
-                        show_label=False,
-                        type="pil",
-                        height=600
-                    )
-                gr.Column(scale=1)  # Right spacer
             seed_info = gr.Textbox(
                 label="种子信息 / Seed Info",
@@ -283,18 +308,19 @@ with gr.Blocks(title="UniCalli - Chinese Calligraphy Generator / 中国书法生
             num_steps,
             seed,
             random_seed,
         ],
-        outputs=[output_image, seed_info]
     )
     # Examples
     gr.Markdown("### 📋 示例 / Examples")
     gr.Examples(
         examples=[
-            ["春风得意马蹄疾", "赵佶\\宋徽宗", "楷 (Regular Script)", 39, 42, False],
-            ["海内存知己", "黄庭坚", "行 (Running Script)", 39, 42, False],
-            ["天道酬勤", "王羲之", "草 (Cursive Script)", 39, 42, False],
-            ["宁静致��", "None (Synthetic / 合成风格)", "楷 (Regular Script)", 39, 42, False],
         ],
         inputs=[
             text_input,
@@ -303,6 +329,7 @@ with gr.Blocks(title="UniCalli - Chinese Calligraphy Generator / 中国书法生
             num_steps,
             seed,
             random_seed,
         ],
     )

             font_descriptions_path='dataset/chirography.json',
             author_descriptions_path='dataset/calligraphy_styles_en.json',
             use_deepspeed=False,
+            use_4bit_quantization=False,  # Disabled - quantization overhead not worth it
         )
     return generator
     num_steps: int,
     seed: int,
     random_seed: bool,
+    batch_size: int = 1,
 ):
     """
     Generate calligraphy based on user inputs
         num_steps: Number of denoising steps
         seed: Random seed
         random_seed: Whether to use random seed
+        batch_size: Number of images to generate
     Returns:
+        Generated images (gallery) and seed info
     """
+    import torch
     # Validate text - must be 1-7 characters
     if len(text) < 1:
         raise gr.Error("文本不能为空 / Text cannot be empty")
     # Handle seed
     if random_seed:
         seed = torch.randint(0, 2**32, (1,)).item()
     # Initialize generator if needed
     gen = init_generator()
+    # Generate batch of images
+    results = []
+    seeds_used = []
+    for i in range(batch_size):
+        current_seed = seed + i  # Increment seed for each image in batch
+        result_img, cond_img = gen.generate(
+            text=text,
+            font_style=font,
+            author=author,
+            num_steps=num_steps,
+            seed=current_seed,
+        )
+        results.append(result_img)
+        seeds_used.append(current_seed)
+    # Format seed info
+    if batch_size == 1:
+        seed_info = f"Seed: {seeds_used[0]}"
+    else:
+        seed_info = f"Seeds: {seeds_used[0]} - {seeds_used[-1]} ({batch_size} images)"
+    return results, seed_info
 # Create Gradio interface
                 label="生成步数 / Inference Steps",
                 minimum=10,
                 maximum=50,
+                value=25,
                 step=1,
                 info="更多步数 = 更高质量，但更慢 / More steps = higher quality, but slower"
             )
                     value=False
                 )
+            batch_size = gr.Slider(
+                label="批量生成数量 / Batch Size",
+                minimum=1,
+                maximum=4,
+                value=1,
+                step=1,
+                info="生成多张图片以选择最佳效果 / Generate multiple images to pick the best"
+            )
             generate_btn = gr.Button("🎨 生成书法 / Generate Calligraphy", variant="primary", size="lg")
         with gr.Column(scale=1):
             gr.Markdown("### 🖼️ 生成结果 / Generated Result")
             gr.Markdown("")  # Add spacing
+            output_gallery = gr.Gallery(
+                label="生成结果 / Generated Results",
+                show_label=False,
+                columns=2,
+                rows=2,
+                height=650,
+                object_fit="contain",
+                allow_preview=True
+            )
             seed_info = gr.Textbox(
                 label="种子信息 / Seed Info",
             num_steps,
             seed,
             random_seed,
+            batch_size,
         ],
+        outputs=[output_gallery, seed_info]
     )
     # Examples
     gr.Markdown("### 📋 示例 / Examples")
     gr.Examples(
         examples=[
+            ["春风得意马蹄疾", "赵佶\\宋徽宗", "楷 (Regular Script)", 25, 42, False, 1],
+            ["海内存知己", "黄庭坚", "行 (Running Script)", 25, 42, False, 1],
+            ["天道酬勤", "王羲之", "草 (Cursive Script)", 25, 42, False, 1],
+            ["宁静致远", "None (Synthetic / 合成风格)", "楷 (Regular Script)", 25, 42, False, 1],
         ],
         inputs=[
             text_input,
             num_steps,
             seed,
             random_seed,
+            batch_size,
         ],
     )

inference.py CHANGED Viewed

@@ -338,6 +338,14 @@ class CalligraphyGenerator:
         if not use_deepspeed:
             print(f"Moving model to {self.device}...")
             model = model.to(self.device)
         return model

         if not use_deepspeed:
             print(f"Moving model to {self.device}...")
             model = model.to(self.device)
+            # Apply torch.compile for faster inference (PyTorch 2.0+)
+            try:
+                print("Applying torch.compile() for acceleration...")
+                model = torch.compile(model, mode="reduce-overhead")
+                print("torch.compile() applied successfully!")
+            except Exception as e:
+                print(f"torch.compile() not available or failed: {e}")
         return model

src/flux/sampling.py CHANGED Viewed

@@ -61,10 +61,10 @@ def prepare(t5: HFEmbedder, clip: HFEmbedder, img: Tensor, prompt: str | list[st
     return {
         "img": img,
-        "img_ids": img_ids.to(img.device),
-        "txt": txt.to(img.device, dtype=img_dtype),
-        "txt_ids": txt_ids.to(img.device),
-        "vec": vec.to(img.device, dtype=img_dtype),
     }

     return {
         "img": img,
+        "img_ids": img_ids.to(device=img.device, dtype=img_dtype),
+        "txt": txt.to(device=img.device, dtype=img_dtype),
+        "txt_ids": txt_ids.to(device=img.device, dtype=img_dtype),
+        "vec": vec.to(device=img.device, dtype=img_dtype),
     }

src/flux/xflux_pipeline.py CHANGED Viewed

@@ -195,13 +195,13 @@ class XFluxPipeline:
             padding="max_length",
             max_length=required_chars
         )["input_ids"]
-        cond_txt_latent = self.embed_tokens(cond_text_token).to(self.device, torch.bfloat16)
         if not is_generation:
             cond_txt_latent = torch.rand(
                 cond_txt_latent.size(),
                 device=self.device,
-                dtype=torch.bfloat16,
                 generator=torch.Generator(device=self.device).manual_seed(seed)
             )
@@ -226,7 +226,7 @@ class XFluxPipeline:
                 controlnet_image = self.annotator(controlnet_image, width, height)
             controlnet_image = torch.from_numpy((np.array(controlnet_image) / 127.5) - 1)
             controlnet_image = controlnet_image.permute(
-                2, 0, 1).unsqueeze(0).to(torch.bfloat16).to(self.device)
         return self.forward(
             prompt,
@@ -313,7 +313,7 @@ class XFluxPipeline:
     ):
         x = get_noise(
             1, height, width, device=self.device,
-            dtype=torch.bfloat16, seed=seed
         )
         timesteps = get_schedule(

             padding="max_length",
             max_length=required_chars
         )["input_ids"]
+        cond_txt_latent = self.embed_tokens(cond_text_token).to(self.device, torch.float32)
         if not is_generation:
             cond_txt_latent = torch.rand(
                 cond_txt_latent.size(),
                 device=self.device,
+                dtype=torch.float32,
                 generator=torch.Generator(device=self.device).manual_seed(seed)
             )
                 controlnet_image = self.annotator(controlnet_image, width, height)
             controlnet_image = torch.from_numpy((np.array(controlnet_image) / 127.5) - 1)
             controlnet_image = controlnet_image.permute(
+                2, 0, 1).unsqueeze(0).to(torch.float32).to(self.device)
         return self.forward(
             prompt,
     ):
         x = get_noise(
             1, height, width, device=self.device,
+            dtype=torch.float32, seed=seed
         )
         timesteps = get_schedule(