Spaces:

OmniSVG
/

OmniSVG-3B

Running on Zero

App Files Files Community

OmniSVG commited on Dec 2, 2025

Commit

01e90e4

verified ·

1 Parent(s): 992bf0d

Update app.py

Browse files

Files changed (1) hide show

app.py +1284 -261

app.py CHANGED Viewed

@@ -9,28 +9,519 @@ import argparse
 import gc
 import yaml
 import glob
-import shutil
-from huggingface_hub import hf_hub_download, snapshot_download
 from decoder import SketchDecoder
 from transformers import AutoTokenizer, AutoProcessor
 from qwen_vl_utils import process_vision_info
 from tokenizer import SVGTokenizer
-import spaces
-# 读取配置
-with open('config.yaml', 'r') as f:
     config = yaml.safe_load(f)
-# 全局变量
 tokenizer = None
 processor = None
 sketch_decoder = None
 svg_tokenizer = None
-device = "cpu"
-# System prompt
-SYSTEM_PROMPT = "You are a multimodal SVG generation assistant capable of generating SVG code from both text descriptions and images."
 SUPPORTED_FORMATS = ['.png', '.jpg', '.jpeg', '.webp', '.bmp', '.gif']
 def parse_args():
     parser = argparse.ArgumentParser(description='SVG Generator Service')
@@ -38,314 +529,846 @@ def parse_args():
     parser.add_argument('--port', type=int, default=7860)
     parser.add_argument('--share', action='store_true')
     parser.add_argument('--debug', action='store_true')
     return parser.parse_args()
-def load_models():
-    """Load models safely (Lazy Loading with Model Construction)"""
-    global tokenizer, processor, sketch_decoder, svg_tokenizer, device
-    if sketch_decoder is not None:
-        return
-    print("🚀 Loading models inside GPU container...")
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    if tokenizer is None:
-        # 1. 准备本地模型目录
-        local_model_dir = "custom_model_build"
-        # 只有当目录里没有权重文件时才执行构建
-        if not os.path.exists(os.path.join(local_model_dir, "pytorch_model.bin")):
-            print("🛠️ Building custom model directory...")
-            os.makedirs(local_model_dir, exist_ok=True)
-            # (A) 下载 Qwen 的配置文件
-            print("Downloading Qwen configurations...")
-            snapshot_download(
-                repo_id="Qwen/Qwen2.5-VL-3B-Instruct",
-                local_dir=local_model_dir,
-                allow_patterns=["*.json", "*.txt", "*.py"], # 这会下载 index.json，下面我们会删掉它
-                ignore_patterns=["*.safetensors", "*.bin", "*.pt"]
-            )
-            # (B) 下载 OmniSVG 权重
-            print("Downloading OmniSVG weights...")
-            sketch_weight_path = hf_hub_download(repo_id="OmniSVG/OmniSVG", filename="pytorch_model.bin")
-            # (C) 处理并保存权重
-            print("Processing and saving weights...")
-            state_dict = torch.load(sketch_weight_path, map_location="cpu")
-            new_state_dict = {}
-            for key in list(state_dict.keys()):
-                if key.startswith("transformer."):
-                    new_key = key.replace("transformer.", "", 1)
-                    new_state_dict[new_key] = state_dict[key]
-                else:
-                    new_state_dict[key] = state_dict[key]
-            torch.save(new_state_dict, os.path.join(local_model_dir, "pytorch_model.bin"))
-            del state_dict, new_state_dict
-            gc.collect()
-            print("✅ Custom model directory built successfully.")
-        # [关键修复] 强制删除所有的 index.json 文件
-        # 即使之前的运行残留了这些文件，这里也会把它们清理掉，防止报错 FileNotFoundError
-        print("🧹 Cleaning up conflicting index files...")
-        for index_file in glob.glob(os.path.join(local_model_dir, "*.index.json")):
-            try:
-                os.remove(index_file)
-                print(f"   Removed: {index_file}")
-            except Exception as e:
-                print(f"   Failed to remove {index_file}: {e}")
-        # 2. 从本地目录加载模型
-        print("Initializing quantized model from local directory...")
-        sketch_decoder = SketchDecoder(model_path=local_model_dir)
-        sketch_decoder.eval()
-        # 3. 加载 Tokenizer
-        tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-VL-3B-Instruct", padding_side="left")
-        processor = AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-3B-Instruct", padding_side="left")
-        svg_tokenizer = SVGTokenizer('config.yaml')
-    print(f"✅ Models loaded successfully on {device}")
-def process_and_resize_image(image_input, target_size=(200, 200)):
-    if isinstance(image_input, str):
-        image = Image.open(image_input)
-    elif isinstance(image_input, Image.Image):
-        image = image_input
     else:
-        image = Image.fromarray(image_input)
-    image = image.resize(target_size, Image.Resampling.LANCZOS)
-    return image
-def get_example_images():
-    example_dir = "./examples"
-    example_images = []
-    if os.path.exists(example_dir):
-        for ext in SUPPORTED_FORMATS:
-            pattern = os.path.join(example_dir, f"*{ext}")
-            example_images.extend(glob.glob(pattern))
-        example_images.sort()
-    return example_images
-def process_text_to_svg(text_description):
-    messages = [{
-        "role": "system",
-        "content": SYSTEM_PROMPT
-    }, {
-        "role": "user",
-        "content": [
-            {"type": "text", "text": f"Task: text-to-svg\nDescription: {text_description}\nGenerate SVG code based on the above description."}
         ]
-    }]
-    text_input = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
-    inputs = processor(text=[text_input], truncation=True, return_tensors="pt")
-    input_ids = inputs['input_ids'].to(device)
-    attention_mask = inputs['attention_mask'].to(device)
-    pixel_values = None
-    image_grid_thw = None
-    return input_ids, attention_mask, pixel_values, image_grid_thw
-def process_image_to_svg(image_path):
-    messages = [{
-        "role": "system",
-        "content": SYSTEM_PROMPT
-    }, {
-        "role": "user",
-        "content": [
-            {"type": "text", "text": f"Task: image-to-svg\nGenerate SVG code that accurately represents the following image."},
-            {"type": "image", "image": image_path},
-        ]
-    }]
-    text_input = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
-    image_inputs, _ = process_vision_info(messages)
-    inputs = processor(
-        text=[text_input],
-        images=image_inputs,
-        truncation=True,
-        return_tensors="pt"
-    )
     input_ids = inputs['input_ids'].to(device)
     attention_mask = inputs['attention_mask'].to(device)
-    pixel_values = inputs['pixel_values'].to(device) if 'pixel_values' in inputs else None
-    image_grid_thw = inputs['image_grid_thw'].to(device) if 'image_grid_thw' in inputs else None
-    return input_ids, attention_mask, pixel_values, image_grid_thw
-def generate_svg(input_ids, attention_mask, pixel_values=None, image_grid_thw=None, task_type="image-to-svg"):
     try:
-        gc.collect()
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
-        print(f"Generating SVG for {task_type}...")
-        if task_type == "image-to-svg":
-            gen_config = dict(
-                do_sample=True,
-                temperature=0.1,
-                top_p=0.001,
-                top_k=1,
-                num_beams=5,
-                repetition_penalty=1.05,
-            )
-        else:
-            gen_config = dict(
-                do_sample=True,
-                temperature=0.8,
-                top_p=0.95,
-                top_k=50,
-                repetition_penalty=1.05,
-                early_stopping=True,
-            )
-        if torch.cuda.is_available():
-            torch.cuda.synchronize()
-        model_config = config['model']
-        max_length = model_config['max_length']
-        output_ids = torch.ones(1, max_length).long().to(device) * model_config['eos_token_id']
-        with torch.no_grad():
-            results = sketch_decoder.transformer.generate(
-                input_ids=input_ids,
-                attention_mask=attention_mask,
-                pixel_values=pixel_values,
-                image_grid_thw=image_grid_thw,
-                max_new_tokens=max_length-1,
-                num_return_sequences=1,
-                bos_token_id=model_config['bos_token_id'],
-                eos_token_id=model_config['eos_token_id'],
-                pad_token_id=model_config['pad_token_id'],
-                use_cache=True,
-                **gen_config
-            )
-            results = results[:, :max_length-1]
-            output_ids[:, :results.shape[1]] = results
-        generated_xy, generated_colors = svg_tokenizer.process_generated_tokens(output_ids)
-        svg_tensors = svg_tokenizer.raster_svg(generated_xy)
-        if not svg_tensors or not svg_tensors[0]:
-            return "Error: No valid SVG paths generated", None
-        print('Creating SVG...')
-        svg = svg_tokenizer.apply_colors_to_svg(svg_tensors[0], generated_colors)
-        svg_str = svg.to_str()
-        png_data = cairosvg.svg2png(bytestring=svg_str.encode('utf-8'))
-        png_image = Image.open(io.BytesIO(png_data))
-        return svg_str, png_image
     except Exception as e:
-        print(f"Generation error: {e}")
         import traceback
         traceback.print_exc()
-        return f"Error: {e}", None
 @spaces.GPU
-def gradio_image_to_svg(image):
-    load_models()
-    if image is None: return "Please upload an image", None
-    processed_image = process_and_resize_image(image)
     with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp_file:
-        processed_image.save(tmp_file.name, format='PNG')
         tmp_path = tmp_file.name
     try:
-        input_ids, attention_mask, pixel_values, image_grid_thw = process_image_to_svg(tmp_path)
-        svg_code, png_image = generate_svg(input_ids, attention_mask, pixel_values, image_grid_thw, "image-to-svg")
-        return svg_code, png_image
     finally:
-        if os.path.exists(tmp_path): os.unlink(tmp_path)
-@spaces.GPU
-def gradio_text_to_svg(text_description):
-    load_models()
-    if not text_description or text_description.strip() == "":
-        return "Please enter a description", None
-    input_ids, attention_mask, pixel_values, image_grid_thw = process_text_to_svg(text_description)
-    svg_code, png_image = generate_svg(input_ids, attention_mask, pixel_values, image_grid_thw, "text-to-svg")
-    return svg_code, png_image
 def create_interface():
     example_texts = [
-        "A yellow t-shirt with a heart design represents love and positivity.",
-        "A bright yellow emoji with a surprised expression and rosy cheeks hovers above a shadow.",
-        "A brown coffee cup on a white saucer is seen from a top-down perspective.",
-        "A cartoon firefighter in a red and yellow uniform represents safety and protection.",
-        "A cute bunny face with pink ears rosy cheeks and a playful red tongue conveys charm and cheerfulness.",
-        "A bearded man with orange hair and a mustache represents a hipster style portrait.",
-        "A colorful ice cream popsicle with a hint of chocolate at the bottom on a stick.",
-        "A light blue shopping bag features a white flower with a red center and scattered dots.",
-        "A yellow phone icon and orange arrow on a blue smartphone screen symbolize an incoming call.",
-        "A sad wilted flower with pink petals slumps over an orange cloud with a blue striped background.",
-        "A cartoon character with dark blue hair and a mustache wears a blue suit against a light blue circular background.",
-        "A blue bookmark icon with a white plus sign in the center.",
-        "A computer monitor displays a bar graph with yellow orange and green bars.",
-        "A blue and gray database icon is overlaid with a yellow star in the bottom right corner.",
-        "An orange thermometer with a circular base represents temperature measurement.",
-        "A green delivery truck icon with a checkmark symbolizing a completed delivery.",
-        "A blue and gray microphone icon symbolizes audio recording or voice input.",
-        "Cloud icon with an upward arrow symbolizes uploading or cloud storage.",
-        "A brown chocolate bar is depicted in four square segments with a shiny glossy finish.",
-        "A colorful moving truck icon with a red and orange cargo container.",
-        "A light blue T-shirt icon is outlined with a bold blue border.",
-        "A person in a blue shirt and dark pants stands with one hand in a pocket gesturing outward.",
     ]
     example_images = get_example_images()
-    with gr.Blocks(title="OmniSVG Demo Page") as demo:
-        gr.Markdown("# OmniSVG Demo Page")
-        gr.Markdown("Generate SVG code from images or text descriptions")
         with gr.Tabs():
-            with gr.TabItem("Image-to-SVG"):
-                with gr.Row():
-                    with gr.Column():
-                        image_input = gr.Image(label="Input Image", type="pil", image_mode="RGBA")
                         if example_images:
-                            gr.Examples(examples=example_images, inputs=[image_input], label="Example Images", examples_per_page=12)
-                        image_generate_btn = gr.Button("Generate SVG", variant="primary")
-                    with gr.Column():
-                        image_svg_output = gr.Textbox(label="Generated SVG Code", lines=10, max_lines=20)
-                        image_png_preview = gr.Image(label="SVG Preview", type="pil")
-                image_generate_btn.click(fn=gradio_image_to_svg, inputs=[image_input], outputs=[image_svg_output, image_png_preview], queue=True)
-            with gr.TabItem("Text-to-SVG"):
-                with gr.Row():
-                    with gr.Column():
-                        text_input = gr.Textbox(label="Description", placeholder="Enter SVG description...", lines=3)
-                        gr.Examples(examples=[[text] for text in example_texts], inputs=[text_input], label="Example Descriptions", examples_per_page=10)
-                        text_generate_btn = gr.Button("Generate SVG", variant="primary")
-                    with gr.Column():
-                        text_svg_output = gr.Textbox(label="Generated SVG Code", lines=10, max_lines=20)
-                        text_png_preview = gr.Image(label="SVG Preview", type="pil")
-                text_generate_btn.click(fn=gradio_text_to_svg, inputs=[text_input], outputs=[text_svg_output, text_png_preview], queue=True)
-        gr.Markdown("""## Usage Instructions...""")
-        return demo
 if __name__ == "__main__":
     os.environ["TOKENIZERS_PARALLELISM"] = "false"
     args = parse_args()
-    print("Application starting... Models will be loaded on demand.")
     demo = create_interface()
-    demo.launch(server_name=args.listen, server_port=args.port, share=args.share, debug=args.debug)

 import gc
 import yaml
 import glob
+import numpy as np
+import time
+import threading
 from decoder import SketchDecoder
 from transformers import AutoTokenizer, AutoProcessor
 from qwen_vl_utils import process_vision_info
 from tokenizer import SVGTokenizer
+# Load config
+with open('./config.yaml', 'r') as f:
     config = yaml.safe_load(f)
+device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+DTYPE = torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16
+# Global Models
 tokenizer = None
 processor = None
 sketch_decoder = None
 svg_tokenizer = None
+# Thread lock for model inference
+generation_lock = threading.Lock()
+# Constants
+SYSTEM_PROMPT = """You are an expert SVG code generator.
+Generate precise, valid SVG path commands that accurately represent the described scene or object.
+Focus on capturing key shapes, spatial relationships, and visual composition."""
 SUPPORTED_FORMATS = ['.png', '.jpg', '.jpeg', '.webp', '.bmp', '.gif']
+TARGET_IMAGE_SIZE = 448
+BLACK_COLOR_TOKEN = 40012
+# Task configurations with defaults
+TASK_CONFIGS = {
+    "text-to-svg-icon": {
+        "default_temperature": 0.5,
+        "default_top_p": 0.88,
+        "default_top_k": 50,
+        "default_repetition_penalty": 1.05,
+    },
+    "text-to-svg-illustration": {
+        "default_temperature": 0.6,
+        "default_top_p": 0.90,
+        "default_top_k": 60,
+        "default_repetition_penalty": 1.03,
+    },
+    "image-to-svg": {
+        "default_temperature": 0.3,
+        "default_top_p": 0.90,
+        "default_top_k": 50,
+        "default_repetition_penalty": 1.05,
+    }
+}
+# Custom CSS
+CUSTOM_CSS = """
+/* Main container centering */
+.gradio-container {
+    max-width: 1400px !important;
+    margin: 0 auto !important;
+    padding: 20px !important;
+}
+/* Header styling */
+.header-container {
+    text-align: center;
+    margin-bottom: 20px;
+    padding: 20px;
+    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+    border-radius: 16px;
+    color: white;
+}
+.header-container h1 {
+    margin: 0;
+    font-size: 2.5em;
+    font-weight: 700;
+}
+.header-container p {
+    margin: 10px 0 0 0;
+    opacity: 0.9;
+    font-size: 1.1em;
+}
+/* Tips section */
+.tips-box {
+    background: #f8f9fa;
+    border-radius: 12px;
+    padding: 20px;
+    margin-bottom: 20px;
+    border: 1px solid #e0e0e0;
+}
+.tips-box h3 {
+    margin-top: 0;
+    color: #333;
+    border-bottom: 2px solid #667eea;
+    padding-bottom: 10px;
+}
+.tip-category {
+    background: white;
+    border-radius: 8px;
+    padding: 15px;
+    margin: 10px 0;
+    border-left: 4px solid #667eea;
+}
+.tip-category h4 {
+    margin: 0 0 10px 0;
+    color: #667eea;
+}
+.tip-category code {
+    background: #f0f0f0;
+    padding: 2px 6px;
+    border-radius: 4px;
+    font-size: 0.9em;
+}
+.example-prompt {
+    background: #e8f4fd;
+    padding: 10px;
+    border-radius: 6px;
+    margin: 8px 0;
+    font-style: italic;
+    font-size: 0.95em;
+    color: #333;
+}
+.red-tip {
+    color: #dc3545;
+    font-weight: 600;
+}
+.red-box {
+    background: #fff5f5;
+    border: 1px solid #ffcccc;
+    border-left: 4px solid #dc3545;
+    padding: 12px;
+    border-radius: 8px;
+    margin: 10px 0;
+}
+.red-box strong {
+    color: #dc3545;
+}
+.orange-box {
+    background: #fff8e6;
+    border: 1px solid #ffc107;
+    border-left: 4px solid #ff9800;
+    padding: 12px;
+    border-radius: 8px;
+    margin: 10px 0;
+}
+.orange-box strong {
+    color: #ff9800;
+}
+.green-box {
+    background: #e8f5e9;
+    border: 1px solid #81c784;
+    border-left: 4px solid #4caf50;
+    padding: 12px;
+    border-radius: 8px;
+    margin: 10px 0;
+}
+.green-box strong {
+    color: #4caf50;
+}
+/* Tab styling */
+.tabs {
+    border-radius: 12px !important;
+    overflow: hidden;
+}
+.tabitem {
+    padding: 20px !important;
+}
+/* Button styling */
+.primary-btn {
+    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
+    border: none !important;
+    font-weight: 600 !important;
+    padding: 12px 24px !important;
+    font-size: 1.1em !important;
+}
+.primary-btn:hover {
+    transform: translateY(-2px);
+    box-shadow: 0 4px 12px rgba(102, 126, 234, 0.4);
+}
+/* Settings group */
+.settings-group {
+    background: #f8f9fa;
+    border-radius: 10px;
+    padding: 15px;
+    margin: 10px 0;
+}
+.advanced-settings {
+    background: #f0f4f8;
+    border-radius: 8px;
+    padding: 12px;
+    margin-top: 10px;
+}
+/* Code output */
+.code-output textarea {
+    font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace !important;
+    font-size: 12px !important;
+    background: #1e1e1e !important;
+    color: #d4d4d4 !important;
+    border-radius: 8px !important;
+}
+/* Input image area */
+.input-image {
+    border: 2px dashed #ccc;
+    border-radius: 12px;
+    transition: border-color 0.3s;
+}
+.input-image:hover {
+    border-color: #667eea;
+}
+/* Footer */
+.footer {
+    text-align: center;
+    padding: 20px;
+    color: #666;
+    font-size: 0.9em;
+}
+/* Responsive adjustments */
+@media (max-width: 768px) {
+    .gradio-container {
+        padding: 10px !important;
+    }
+    .header-container h1 {
+        font-size: 1.8em;
+    }
+}
+"""
+# Enhanced Tips HTML - Bilingual with Red Tips
+TIPS_HTML = """
+<div class="tips-box">
+    <h3>💡 Prompting Guide & Best Practices | 提示词指南与最佳实践</h3>
+    <!-- Critical Red Tips Section -->
+    <div class="red-box">
+        <strong>🔴 CRITICAL: Tips That WILL Improve Your Results | 关键：一定能提升效果的技巧</strong>
+        <ul style="margin: 8px 0 0 0; padding-left: 20px;">
+            <li style="color: #dc3545; font-weight: 600;">
+                <strong>🎲 Generate 4-8 candidates and pick the best one!</strong> Results vary significantly between generations - this is NORMAL!<br/>
+                <span style="color: #666; font-weight: normal;">生成4-8个候选结果并选择最好的！每次生成结果差异很大 - 这是正常的！</span>
+            </li>
+            <li style="color: #dc3545; font-weight: 600;">
+                <strong>📐 Use GEOMETRIC descriptions:</strong> "triangular roof", "circular head", "rectangular body", "curved tail"<br/>
+                <span style="color: #666; font-weight: normal;">使用几何描述："三角形屋顶"、"圆形头部"、"矩形身体"、"弯曲尾巴"</span>
+            </li>
+            <li style="color: #dc3545; font-weight: 600;">
+                <strong>🎨 ALWAYS specify colors for EACH element:</strong> "black outline", "red roof", "blue shirt", "green grass"<br/>
+                <span style="color: #666; font-weight: normal;">始终为每个元素指定颜色："黑色轮廓"、"红色屋顶"、"蓝色衬衫"、"绿色草地"</span>
+            </li>
+            <li style="color: #dc3545; font-weight: 600;">
+                <strong>⬜ Say "white background" or "on white background"</strong> for cleaner results<br/>
+                <span style="color: #666; font-weight: normal;">说"白色背景"或"在白色背景上"可获得更干净的结果</span>
+            </li>
+            <li style="color: #dc3545; font-weight: 600;">
+                <strong>📍 Describe position & orientation:</strong> "centrally positioned", "pointing upward", "facing right", "at the bottom"<br/>
+                <span style="color: #666; font-weight: normal;">描述位置和方向："居中放置"、"指向上方"、"朝右"、"在底部"</span>
+            </li>
+            <li style="color: #dc3545; font-weight: 600;">
+                <strong>✂️ Keep it SIMPLE:</strong> Avoid complex sentences. Use short, clear phrases connected by commas.<br/>
+                <span style="color: #666; font-weight: normal;">保持简单：避免复杂句子。使用简短清晰的短语，用逗号连接。</span>
+            </li>
+        </ul>
+    </div>
+    <!-- Parameter Tuning Tips -->
+    <div class="orange-box">
+        <strong>🎛️ Parameter Tuning Guide | 参数调整指南</strong>
+        <table style="width: 100%; margin-top: 10px; border-collapse: collapse;">
+            <tr style="background: rgba(255,255,255,0.5);">
+                <th style="padding: 8px; text-align: left; border-bottom: 1px solid #ddd;">Scenario 场景</th>
+                <th style="padding: 8px; text-align: center; border-bottom: 1px solid #ddd;">Temperature</th>
+                <th style="padding: 8px; text-align: center; border-bottom: 1px solid #ddd;">Top-P</th>
+                <th style="padding: 8px; text-align: center; border-bottom: 1px solid #ddd;">Top-K</th>
+                <th style="padding: 8px; text-align: center; border-bottom: 1px solid #ddd;">Rep. Penalty</th>
+            </tr>
+            <tr>
+                <td style="padding: 8px;">Simple icons/shapes 简单图标</td>
+                <td style="padding: 8px; text-align: center;">0.3 - 0.5</td>
+                <td style="padding: 8px; text-align: center;">0.85 - 0.90</td>
+                <td style="padding: 8px; text-align: center;">40 - 50</td>
+                <td style="padding: 8px; text-align: center;">1.05</td>
+            </tr>
+            <tr style="background: rgba(255,255,255,0.3);">
+                <td style="padding: 8px;">Characters/Avatars 人物/头像</td>
+                <td style="padding: 8px; text-align: center;">0.5 - 0.7</td>
+                <td style="padding: 8px; text-align: center;">0.88 - 0.92</td>
+                <td style="padding: 8px; text-align: center;">50 - 70</td>
+                <td style="padding: 8px; text-align: center;">1.02 - 1.05</td>
+            </tr>
+            <tr>
+                <td style="padding: 8px;">Landscapes/Scenes 风景/场景</td>
+                <td style="padding: 8px; text-align: center;">0.5 - 0.7</td>
+                <td style="padding: 8px; text-align: center;">0.88 - 0.92</td>
+                <td style="padding: 8px; text-align: center;">50 - 60</td>
+                <td style="padding: 8px; text-align: center;">1.03</td>
+            </tr>
+            <tr style="background: rgba(255,255,255,0.3);">
+                <td style="padding: 8px;">Image-to-SVG 图像转SVG</td>
+                <td style="padding: 8px; text-align: center;">0.2 - 0.4</td>
+                <td style="padding: 8px; text-align: center;">0.88 - 0.92</td>
+                <td style="padding: 8px; text-align: center;">40 - 50</td>
+                <td style="padding: 8px; text-align: center;">1.05</td>
+            </tr>
+        </table>
+        <p style="margin: 10px 0 0 0; font-size: 0.9em; color: #856404;">
+            💡 <strong>Tip:</strong> If results are too chaotic, lower temperature. If too simple/empty, raise it slightly.<br/>
+            如果结果太混乱，降低温度。如果太简单/空白，稍微提高。
+        </p>
+    </div>
+    <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(300px, 1fr)); gap: 15px; margin-top: 15px;">
+        <div class="tip-category">
+            <h4>🎯 Icons & Simple Shapes | 图标与简单形状</h4>
+            <p>Use clear geometric descriptions with explicit colors.<br/>
+            <span style="color: #666; font-size: 0.9em;">使用清晰的几何描述和明确的颜色。</span></p>
+            <div class="example-prompt">
+                "A black triangle pointing downward, centrally positioned on white background."<br/>
+                <span style="color: #666;">"黑色三角形，指向下方，居中在白色背景上。"</span>
+            </div>
+            <div class="example-prompt">
+                "A red heart shape with smooth curved edges, centered on white background."<br/>
+                <span style="color: #666;">"红色心形，边缘光滑弯曲，居中在白色背景上。"</span>
+            </div>
+            <p><strong>Keywords:</strong> <code>triangle</code> <code>circle</code> <code>arrow</code> <code>heart</code> <code>star</code> <code>centered</code></p>
+        </div>
+        <div class="tip-category">
+            <h4>👤 Characters & People | 人物角色</h4>
+            <p>Break down into simple geometric parts. Describe each body part with shape + color.<br/>
+            <span style="color: #666; font-size: 0.9em;">分解为简单几何部分。用形状+颜色描述每个身体部位。</span></p>
+            <div class="example-prompt">
+                "A simple person: round beige head, rectangular blue shirt body, two dark gray rectangular legs. Standing pose, arms at sides, flat colors, white background."<br/>
+                <span style="color: #666;">"简单人物：米色圆形头，蓝色矩形衬衫身体，两条深灰矩形腿。站立姿势，双臂下垂，平面颜色，白色背景。"</span>
+            </div>
+            <div class="example-prompt">
+                "A girl with long black hair, pink dress with triangular skirt shape, small circular face with dot eyes and curved smile. Simple cartoon style."<br/>
+                <span style="color: #666;">"长黑发女孩，粉色连衣裙（三角形裙摆），小圆脸配点状眼睛和弯曲微笑。简单卡通风格。"</span>
+            </div>
+            <p class="red-tip">⚠️ Keep poses SIMPLE: standing, sitting, waving. Avoid complex actions!</p>
+        </div>
+        <div class="tip-category">
+            <h4>😊 Avatars & Portraits | 头像与肖像</h4>
+            <p>Use circular frame, focus on face and upper body only.<br/>
+            <span style="color: #666; font-size: 0.9em;">使用圆形框架，只关注脸部和上半身。</span></p>
+            <div class="example-prompt">
+                "Circular avatar: person with short black hair, round face with two dot eyes and small curved smile, wearing blue collar shirt. Minimal style, white background."<br/>
+                <span style="color: #666;">"圆形头像：短黑发人物，圆脸配两个点状眼睛和小弯曲微笑，穿蓝色衬衫领子。极简风格，白色背景。"</span>
+            </div>
+            <div class="example-prompt">
+                "Profile avatar silhouette: black side view of head with short hair, facing right. Simple solid shape on white background."<br/>
+                <span style="color: #666;">"侧面头像剪影：黑色短发头部侧视图，朝右。简单实心形状，白色背景。"</span>
+            </div>
+        </div>
+        <div class="tip-category">
+            <h4>🏔️ Landscapes & Scenes | 风景与场景</h4>
+            <p>Layer elements from background to foreground. Specify color for EACH layer.<br/>
+            <span style="color: #666; font-size: 0.9em;">从背景到前景分层。为每层指定颜色。</span></p>
+            <div class="example-prompt">
+                "Layered landscape: light blue sky at top, gray triangular mountains in middle, dark green triangular pine trees at bottom. Flat colors, simple shapes."<br/>
+                <span style="color: #666;">"分层风景：顶部浅蓝天空，中间灰色三角山脉，底部深绿三角松树。平面颜色，简单形状。"</span>
+            </div>
+            <div class="example-prompt">
+                "Sunset beach: orange gradient sky at top, yellow semicircle sun on horizon, dark blue wavy ocean below, tan beach at bottom."<br/>
+                <span style="color: #666;">"日落海滩：顶部橙色渐变天空，地平线黄色半圆太阳，下方深蓝波浪海洋，底部棕褐色沙滩。"</span>
+            </div>
+            <p class="red-tip">⚠️ Use geometric shapes for nature: triangular trees, wavy water, semicircle sun!</p>
+        </div>
+        <div class="tip-category">
+            <h4>🐱 Animals | 动物</h4>
+            <p>Describe as geometric shapes: oval body, round head, triangular ears, curved tail.<br/>
+            <span style="color: #666; font-size: 0.9em;">描述为几何形状：椭圆身体，圆头，三角耳朵，弯曲尾巴。</span></p>
+            <div class="example-prompt">
+                "Cute cat: orange round head with two triangular ears, oval orange body, curved tail. Simple cartoon style with black outlines, sitting pose, white background."<br/>
+                <span style="color: #666;">"可爱猫咪：橙色圆头配两个三角耳朵，橙色椭圆身体，弯曲尾巴。简单卡通风格，黑色轮廓，坐姿，白色背景。"</span>
+            </div>
+            <div class="example-prompt">
+                "Simple black bird: oval body, small round head, pointed triangular beak facing right, triangular tail, two stick legs. Silhouette style on white."<br/>
+                <span style="color: #666;">"简单黑鸟：椭圆身体，小圆头，尖三角喙朝右，三角尾巴，两条棒状腿。白色背景剪影风格。"</span>
+            </div>
+        </div>
+        <div class="tip-category">
+            <h4>🏠 Buildings & Objects | 建筑与物体</h4>
+            <p>Use basic shapes: rectangles for walls, triangles for roofs, squares for windows.<br/>
+            <span style="color: #666; font-size: 0.9em;">使用基本形状：矩形墙壁，三角屋顶，方形窗户。</span></p>
+            <div class="example-prompt">
+                "Simple house: red triangular roof on top, beige rectangular wall, brown rectangular door in center, two small blue square windows. Green ground at bottom, white background."<br/>
+                <span style="color: #666;">"简单房屋：顶部红色三角屋顶，米色矩形墙壁，中间棕色矩形门，两个小蓝色方形窗户。底部绿色地面，白色背景。"</span>
+            </div>
+            <div class="example-prompt">
+                "Coffee mug: brown cylindrical cup shape with curved handle on right side, three wavy steam lines rising from top. Simple flat style on white."<br/>
+                <span style="color: #666;">"咖啡杯：棕色圆柱杯身，右侧弯曲把手，顶部三条波浪蒸汽线上升。简单平面风格，白色背景。"</span>
+            </div>
+        </div>
+    </div>
+    <!-- Extended Examples Section -->
+    <div style="margin-top: 20px; padding: 15px; background: #f0f7ff; border-radius: 10px; border: 1px solid #cce5ff;">
+        <h4 style="margin-top: 0; color: #0066cc;">🎨 More Complex Examples (Generate 6-8 candidates!) | 更多复杂示例（请生成6-8个候选！）</h4>
+        <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(280px, 1fr)); gap: 12px; margin-top: 15px;">
+            <div class="example-prompt">
+                <strong>👨‍💼 Business Avatar:</strong><br/>
+                "Circular professional avatar: man with short black hair, neutral skin tone round face, wearing dark navy suit with white shirt collar visible. Clean minimal style, centered in circle, white background."
+            </div>
+            <div class="example-prompt">
+                <strong>👩 Female Portrait:</strong><br/>
+                "Simple female face: oval face shape, long brown wavy hair on sides, two dot eyes, small nose, curved smile lips. Pink blush on cheeks. Cartoon portrait style, white background."
+            </div>
+            <div class="example-prompt">
+                <strong>🧒 Child Character:</strong><br/>
+                "Cute child standing: large round head with short brown hair, big circular eyes with white highlights, small body in red t-shirt and blue shorts, simple stick arms and legs. Cheerful cartoon style."
+            </div>
+            <div class="example-prompt">
+                <strong>🏃 Active Pose:</strong><br/>
+                "Person walking: side view, circular head, rectangular torso in green jacket, legs in walking position (one forward, one back). Simple geometric style, moving right, white background."
+            </div>
+            <div class="example-prompt">
+                <strong>🌲 Forest Scene:</strong><br/>
+                "Simple forest: light blue sky, row of 5 dark green triangular pine trees of varying heights, brown rectangular trunks, light green grass strip at bottom. Layered flat design."
+            </div>
+            <div class="example-prompt">
+                <strong>🌊 Ocean View:</strong><br/>
+                "Minimalist ocean: gradient blue sky at top, three horizontal wavy lines in dark blue for ocean, small white sailboat with triangular sail in center. Clean vector style."
+            </div>
+            <div class="example-prompt">
+                <strong>🌆 City Skyline:</strong><br/>
+                "Simple city skyline: orange sunset sky gradient, row of black rectangular building silhouettes of different heights, some with small yellow square windows. Minimalist style."
+            </div>
+            <div class="example-prompt">
+                <strong>🐕 Dog Character:</strong><br/>
+                "Friendly cartoon dog: brown oval body, round head with floppy ears, black dot nose, curved tail pointing up, four short legs. Sitting pose facing forward, white background."
+            </div>
+        </div>
+    </div>
+    <!-- Quick Troubleshooting -->
+    <div class="green-box" style="margin-top: 15px;">
+        <strong>⚡ Quick Troubleshooting | 快速故障排除</strong>
+        <ul style="margin: 8px 0 0 0; padding-left: 20px;">
+            <li><strong>Messy/chaotic? 混乱？</strong> → Lower temperature to 0.3-0.4, simplify description, reduce top_k</li>
+            <li><strong>Too simple/empty? 太简单？</strong> → Raise temperature to 0.5-0.6, add more shape details</li>
+            <li><strong>Wrong colors? 颜色错误？</strong> → Explicitly name EVERY color: "red roof", "blue shirt", "black outline"</li>
+            <li><strong>Missing elements? 元素缺失？</strong> → Add position words: "at top", "in center", "at bottom left"</li>
+            <li><strong>Repetitive patterns? 重复图案？</strong> → Increase repetition_penalty to 1.08-1.15</li>
+            <li><strong>Inconsistent? 不一致？</strong> → <span class="red-tip">Generate MORE candidates (6-8) and pick the best!</span></li>
+        </ul>
+    </div>
+    <!-- Prompt Template -->
+    <div style="margin-top: 15px; padding: 12px; background: #e8f5e9; border-radius: 8px; border-left: 4px solid #4caf50;">
+        <strong>✅ Recommended Prompt Structure | 推荐提示词结构</strong>
+        <div style="background: white; padding: 10px; border-radius: 6px; margin-top: 8px; font-family: monospace; font-size: 0.9em;">
+            [Subject] + [Shape descriptions with colors] + [Position/orientation] + [Style] + [Background]
+        </div>
+        <p style="margin: 10px 0 0 0; color: #2e7d32; font-size: 0.95em;">
+            ✓ "A fox logo: triangular orange head, pointed ears, white chest marking, facing right. Minimalist flat style, centered on white background."
+        </p>
+    </div>
+</div>
+"""
+# Image-to-SVG specific tips
+IMAGE_TIPS_HTML = """
+<div class="red-box">
+    <strong>🔴 Image-to-SVG Tips | 图片转SVG技巧</strong>
+    <ul style="margin: 8px 0 0 0; padding-left: 20px;">
+        <li><strong>Best input: Simple images with white/transparent background</strong><br/>
+            <span style="color: #666;">最佳输入：白色或透明背景的简单图片</span></li>
+        <li><strong>PNG with transparency (RGBA) works best!</strong> We auto-convert to white background.<br/>
+            <span style="color: #666;">透明背景的PNG效果最好！我们会自动转换为白色背景。</span></li>
+        <li><strong>For complex backgrounds:</strong> Enable "Replace Background" option below.<br/>
+            <span style="color: #666;">复杂背景图片：启用下方的"替换背景"选项。</span></li>
+        <li><strong>Lower temperature (0.2-0.4)</strong> for more accurate reproduction.<br/>
+            <span style="color: #666;">较低温度(0.2-0.4)可获得更准确的复制效果。</span></li>
+        <li style="color: #dc3545; font-weight: 600;"><strong>Generate 4-8 candidates!</strong> Pick the one that best matches your input.<br/>
+            <span style="color: #666; font-weight: normal;">生成4-8个候选！选择最匹配输入的那个。</span></li>
+    </ul>
+</div>
+"""
 def parse_args():
     parser = argparse.ArgumentParser(description='SVG Generator Service')
     parser.add_argument('--port', type=int, default=7860)
     parser.add_argument('--share', action='store_true')
     parser.add_argument('--debug', action='store_true')
+    parser.add_argument('--weight_path', type=str, default="/mnt/jfs-test/OmniSVG_result/8B_1126/1688_bs_4/merge_slerp/merge_150_350_bf16")
+    parser.add_argument('--model_path', type=str, default="/mnt/jfs-test/Qwen2.5-VL-7B-Instruct")
     return parser.parse_args()
+def load_models(weight_path, model_path):
+    """Load all models"""
+    global tokenizer, processor, sketch_decoder, svg_tokenizer
+    print(f"Loading models from {model_path}...")
+    print(f"Using precision: {DTYPE}")
+    tokenizer = AutoTokenizer.from_pretrained(model_path, padding_side="left")
+    processor = AutoProcessor.from_pretrained(model_path, padding_side="left")
+    processor.tokenizer.padding_side = "left"
+    sketch_decoder = SketchDecoder(
+        pix_len=config['model']['max_length'],
+        text_len=200,
+        model_path=model_path,
+        torch_dtype=DTYPE
+    )
+    bin_path = os.path.join(weight_path, "pytorch_model.bin")
+    if os.path.exists(bin_path):
+        print(f"Loading weights from: {bin_path}")
+        sketch_decoder.load_state_dict(torch.load(bin_path, map_location='cpu'))
+    else:
+        raise FileNotFoundError(f"No weights found at {bin_path}")
+    sketch_decoder = sketch_decoder.to(device).eval()
+    svg_tokenizer = SVGTokenizer('./config.yaml')
+    print("All models loaded successfully!")
+def detect_text_subtype(text_prompt):
+    """Auto-detect text prompt subtype"""
+    text_lower = text_prompt.lower()
+    icon_keywords = ['icon', 'logo', 'symbol', 'badge', 'button', 'emoji', 'glyph', 'simple',
+                     'arrow', 'triangle', 'circle', 'square', 'heart', 'star', 'checkmark']
+    if any(kw in text_lower for kw in icon_keywords):
+        return "icon"
+    illustration_keywords = [
+        'illustration', 'scene', 'person', 'people', 'character', 'man', 'woman', 'boy', 'girl',
+        'avatar', 'portrait', 'face', 'head', 'body',
+        'cat', 'dog', 'bird', 'animal', 'pet', 'fox', 'rabbit',
+        'sitting', 'standing', 'walking', 'running', 'sleeping', 'holding', 'playing',
+        'house', 'building', 'tree', 'garden', 'landscape', 'mountain', 'forest', 'city',
+        'ocean', 'beach', 'sunset', 'sunrise', 'sky'
+    ]
+    match_count = sum(1 for kw in illustration_keywords if kw in text_lower)
+    if match_count >= 1 or len(text_prompt) > 50:
+        return "illustration"
+    return "icon"
+def detect_and_replace_background(image, threshold=240, edge_sample_ratio=0.1):
+    """
+    Detect if image has non-white background and optionally replace it.
+    Args:
+        image: PIL Image (RGB or RGBA)
+        threshold: Pixel values above this are considered "white"
+        edge_sample_ratio: Ratio of edge pixels to sample
+    Returns:
+        tuple: (processed_image, background_was_replaced)
+    """
+    img_array = np.array(image)
+    # If already has alpha channel, composite onto white
+    if image.mode == 'RGBA':
+        # Create white background and composite
+        bg = Image.new('RGBA', image.size, (255, 255, 255, 255))
+        composite = Image.alpha_composite(bg, image)
+        return composite.convert('RGB'), True
+    # Sample edge pixels to detect background color
+    h, w = img_array.shape[:2]
+    edge_pixels = []
+    # Sample from all 4 edges
+    sample_count = max(10, int(min(h, w) * edge_sample_ratio))
+    # Top and bottom edges
+    for i in range(0, w, max(1, w // sample_count)):
+        edge_pixels.append(img_array[0, i])
+        edge_pixels.append(img_array[h-1, i])
+    # Left and right edges
+    for i in range(0, h, max(1, h // sample_count)):
+        edge_pixels.append(img_array[i, 0])
+        edge_pixels.append(img_array[i, w-1])
+    edge_pixels = np.array(edge_pixels)
+    # Check if background is already white-ish
+    if len(edge_pixels) > 0:
+        mean_edge = edge_pixels.mean(axis=0)
+        if np.all(mean_edge > threshold):
+            # Background is already white, just return original
+            return image, False
+    # Background is not white - try to replace it
+    # Use the most common edge color as the background color to replace
+    if len(img_array.shape) == 3 and img_array.shape[2] >= 3:
+        # Convert to grayscale for easier background detection
+        if img_array.shape[2] == 4:
+            gray = np.mean(img_array[:, :, :3], axis=2)
+        else:
+            gray = np.mean(img_array, axis=2)
+        # Find background color (most common color at edges)
+        edge_colors = []
+        for i in range(w):
+            edge_colors.append(tuple(img_array[0, i, :3]))
+            edge_colors.append(tuple(img_array[h-1, i, :3]))
+        for i in range(h):
+            edge_colors.append(tuple(img_array[i, 0, :3]))
+            edge_colors.append(tuple(img_array[i, w-1, :3]))
+        # Find most common edge color
+        from collections import Counter
+        color_counts = Counter(edge_colors)
+        bg_color = color_counts.most_common(1)[0][0]
+        # Create mask for background (colors similar to detected bg_color)
+        color_diff = np.sqrt(np.sum((img_array[:, :, :3].astype(float) - np.array(bg_color)) ** 2, axis=2))
+        bg_mask = color_diff < 30  # Threshold for color similarity
+        # Replace background with white
+        result = img_array.copy()
+        if result.shape[2] == 4:
+            result[bg_mask] = [255, 255, 255, 255]
+        else:
+            result[bg_mask] = [255, 255, 255]
+        return Image.fromarray(result).convert('RGB'), True
+    return image, False
+def preprocess_image_for_svg(image, replace_background=True, target_size=448):
+    """
+    Preprocess image for SVG generation.
+    Args:
+        image: Input PIL Image or path
+        replace_background: Whether to replace non-white backgrounds
+        target_size: Target size for resizing
+    Returns:
+        tuple: (processed_pil_image, was_modified)
+    """
+    # Load image if path
+    if isinstance(image, str):
+        raw_img = Image.open(image)
+    else:
+        raw_img = image
+    was_modified = False
+    # Handle different modes
+    if raw_img.mode == 'RGBA':
+        # RGBA images: composite onto white background
+        bg = Image.new('RGBA', raw_img.size, (255, 255, 255, 255))
+        img_with_bg = Image.alpha_composite(bg, raw_img).convert('RGB')
+        was_modified = True
+    elif raw_img.mode == 'LA' or raw_img.mode == 'PA':
+        # Grayscale or Palette with alpha
+        raw_img = raw_img.convert('RGBA')
+        bg = Image.new('RGBA', raw_img.size, (255, 255, 255, 255))
+        img_with_bg = Image.alpha_composite(bg, raw_img).convert('RGB')
+        was_modified = True
+    elif raw_img.mode != 'RGB':
+        img_with_bg = raw_img.convert('RGB')
     else:
+        img_with_bg = raw_img
+    # Optionally detect and replace non-white background
+    if replace_background:
+        img_with_bg, bg_replaced = detect_and_replace_background(img_with_bg)
+        was_modified = was_modified or bg_replaced
+    # Resize to target size
+    img_resized = img_with_bg.resize((target_size, target_size), Image.Resampling.LANCZOS)
+    return img_resized, was_modified
+def prepare_inputs(task_type, content):
+    """Prepare model inputs"""
+    if task_type == "text-to-svg":
+        prompt_text = str(content).strip()
+        instruction = f"""Generate an SVG illustration for: {prompt_text}
+Requirements:
+- Create complete SVG path commands
+- Include proper coordinates and colors
+- Maintain visual clarity and composition"""
+        messages = [
+            {"role": "system", "content": SYSTEM_PROMPT},
+            {"role": "user", "content": [{"type": "text", "text": instruction}]}
+        ]
+        text_input = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+        inputs = processor(text=[text_input], padding=True, truncation=True, return_tensors="pt")
+    else:  # image-to-svg
+        messages = [
+            {"role": "system", "content": SYSTEM_PROMPT},
+            {"role": "user", "content": [
+                {"type": "text", "text": "Generate SVG code that accurately represents this image:"},
+                {"type": "image", "image": content},
+            ]}
         ]
+        text_input = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+        image_inputs, _ = process_vision_info(messages)
+        inputs = processor(text=[text_input], images=image_inputs, padding=True, truncation=True, return_tensors="pt")
+    return inputs
+def render_svg_to_image(svg_str, size=512):
+    """Render SVG to high-quality PIL Image"""
+    try:
+        png_data = cairosvg.svg2png(
+            bytestring=svg_str.encode('utf-8'),
+            output_width=size,
+            output_height=size
+        )
+        image_rgba = Image.open(io.BytesIO(png_data)).convert("RGBA")
+        bg = Image.new("RGB", image_rgba.size, (255, 255, 255))
+        bg.paste(image_rgba, mask=image_rgba.split()[3])
+        return bg
+    except Exception as e:
+        print(f"Render error: {e}")
+        return None
+def create_gallery_html(candidates, cols=4):
+    """Create HTML gallery for multiple SVG candidates"""
+    if not candidates:
+        return '<div style="text-align:center;color:#999;padding:50px;">No candidates generated / 未生成候选</div>'
+    items_html = []
+    for i, cand in enumerate(candidates):
+        svg_str = cand['svg']
+        if 'viewBox' not in svg_str:
+            svg_str = svg_str.replace('<svg', f'<svg viewBox="0 0 {TARGET_IMAGE_SIZE} {TARGET_IMAGE_SIZE}"', 1)
+        item_html = f'''
+        <div style="
+            background: white;
+            border: 1px solid #ddd;
+            border-radius: 8px;
+            padding: 10px;
+            text-align: center;
+            transition: transform 0.2s, box-shadow 0.2s;
+            cursor: pointer;
+        " onmouseover="this.style.transform='scale(1.02)';this.style.boxShadow='0 4px 12px rgba(0,0,0,0.15)';"
+           onmouseout="this.style.transform='scale(1)';this.style.boxShadow='none';">
+            <div style="width: 180px; height: 180px; margin: 0 auto; display: flex; justify-content: center; align-items: center; overflow: hidden;">
+                {svg_str}
+            </div>
+            <div style="margin-top: 8px; font-size: 12px; color: #666;">
+                #{i+1} | {cand['path_count']} paths
+            </div>
+        </div>
+        '''
+        items_html.append(item_html)
+    grid_html = f'''
+    <div style="
+        display: grid;
+        grid-template-columns: repeat({cols}, 1fr);
+        gap: 15px;
+        padding: 15px;
+        background: #fafafa;
+        border-radius: 12px;
+    ">
+        {''.join(items_html)}
+    </div>
+    '''
+    return grid_html
+def is_valid_candidate(svg_str, img, subtype="illustration"):
+    """Check candidate validity"""
+    if not svg_str or len(svg_str) < 20:
+        return False, "too_short"
+    if '<svg' not in svg_str:
+        return False, "no_svg_tag"
+    if img is None:
+        return False, "render_failed"
+    img_array = np.array(img)
+    mean_val = img_array.mean()
+    threshold = 250 if subtype == "illustration" else 252
+    if mean_val > threshold:
+        return False, "empty_image"
+    return True, "ok"
+def generate_candidates(inputs, task_type, subtype, temperature, top_p, top_k, repetition_penalty,
+                       max_length, num_samples, progress_callback=None):
+    """Generate candidate SVGs with full parameter control"""
     input_ids = inputs['input_ids'].to(device)
     attention_mask = inputs['attention_mask'].to(device)
+    model_inputs = {
+        "input_ids": input_ids,
+        "attention_mask": attention_mask
+    }
+    if 'pixel_values' in inputs:
+        model_inputs["pixel_values"] = inputs['pixel_values'].to(device, dtype=DTYPE)
+    if 'image_grid_thw' in inputs:
+        model_inputs["image_grid_thw"] = inputs['image_grid_thw'].to(device)
+    all_candidates = []
+    # Generation config with user parameters
+    gen_config = {
+        'do_sample': True,
+        'temperature': temperature,
+        'top_p': top_p,
+        'top_k': int(top_k),
+        'repetition_penalty': repetition_penalty,
+        'early_stopping': True,
+        'no_repeat_ngram_size': 0,
+        'eos_token_id': config['model']['eos_token_id'],
+        'pad_token_id': config['model']['pad_token_id'],
+        'bos_token_id': config['model']['bos_token_id'],
+    }
+    actual_samples = num_samples + 4
     try:
+        if progress_callback:
+            progress_callback(0.1, "Waiting for model access / 等待模型访问...")
+        with generation_lock:
+            if progress_callback:
+                progress_callback(0.15, "Generating SVG tokens / 生成SVG令牌...")
+            with torch.no_grad():
+                results = sketch_decoder.transformer.generate(
+                    **model_inputs,
+                    max_new_tokens=max_length,
+                    num_return_sequences=actual_samples,
+                    use_cache=True,
+                    **gen_config
+                )
+                input_len = input_ids.shape[1]
+                generated_ids_batch = results[:, input_len:]
+        if progress_callback:
+            progress_callback(0.5, "Processing generated tokens / 处理生成的令牌...")
+        for i in range(min(actual_samples, generated_ids_batch.shape[0])):
+            try:
+                current_ids = generated_ids_batch[i:i+1]
+                fake_wrapper = torch.cat([
+                    torch.full((1, 1), config['model']['bos_token_id'], device=device),
+                    current_ids,
+                    torch.full((1, 1), config['model']['eos_token_id'], device=device)
+                ], dim=1)
+                generated_xy = svg_tokenizer.process_generated_tokens(fake_wrapper)
+                if len(generated_xy) == 0:
+                    continue
+                svg_tensors, color_tensors = svg_tokenizer.raster_svg(generated_xy)
+                if not svg_tensors or not svg_tensors[0]:
+                    continue
+                num_paths = len(svg_tensors[0])
+                while len(color_tensors) < num_paths:
+                    color_tensors.append(BLACK_COLOR_TOKEN)
+                svg = svg_tokenizer.apply_colors_to_svg(svg_tensors[0], color_tensors)
+                svg_str = svg.to_str()
+                if 'width=' not in svg_str:
+                    svg_str = svg_str.replace('<svg', f'<svg width="{TARGET_IMAGE_SIZE}" height="{TARGET_IMAGE_SIZE}"', 1)
+                png_image = render_svg_to_image(svg_str, size=512)
+                is_valid, reason = is_valid_candidate(svg_str, png_image, subtype)
+                if is_valid:
+                    all_candidates.append({
+                        'svg': svg_str,
+                        'img': png_image,
+                        'path_count': num_paths,
+                        'index': len(all_candidates) + 1
+                    })
+                    if progress_callback:
+                        progress_callback(0.5 + 0.4 * (i / actual_samples),
+                                        f"Found {len(all_candidates)} valid / 找到 {len(all_candidates)} 个有效...")
+                    if len(all_candidates) >= num_samples:
+                        break
+            except Exception as e:
+                print(f"  Candidate {i} error: {e}")
+                continue
     except Exception as e:
+        print(f"Generation Error: {e}")
         import traceback
         traceback.print_exc()
+    if progress_callback:
+        progress_callback(0.95, f"Generated {len(all_candidates)} valid / 生成了 {len(all_candidates)} 个有效")
+    return all_candidates
 @spaces.GPU
+def gradio_text_to_svg(text_description, num_candidates, temperature, top_p, top_k, repetition_penalty,
+                       progress=gr.Progress()):
+    """Gradio interface - text-to-svg with advanced parameters"""
+    if not text_description or text_description.strip() == "":
+        return '<div style="text-align:center;color:#999;padding:50px;">Please enter a description / 请输入描述</div>', ""
+    progress(0, "Starting generation / 开始生成...")
+    gc.collect()
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+    start_time = time.time()
+    subtype = detect_text_subtype(text_description)
+    progress(0.05, f"Detected: {subtype} / 检测到: {subtype}")
+    inputs = prepare_inputs("text-to-svg", text_description.strip())
+    max_length = config['model']['max_length']
+    def update_progress(val, msg):
+        progress(val, msg)
+    all_candidates = generate_candidates(
+        inputs, "text-to-svg", subtype,
+        temperature, top_p, int(top_k), repetition_penalty,
+        max_length, int(num_candidates),
+        progress_callback=update_progress
+    )
+    elapsed = time.time() - start_time
+    if not all_candidates:
+        return (
+            '<div style="text-align:center;color:#999;padding:50px;">No valid SVG generated. Try different parameters or rephrase your prompt.<br/>未生成有效的SVG。请尝试不同参数或重新描述。</div>',
+            f"<!-- No valid SVG (took {elapsed:.1f}s) -->"
+        )
+    svg_codes = []
+    for i, cand in enumerate(all_candidates):
+        svg_codes.append(f"<!-- ====== Candidate {i+1} | Paths: {cand['path_count']} ====== -->\n{cand['svg']}")
+    combined_svg = "\n\n".join(svg_codes)
+    gallery_html = create_gallery_html(all_candidates)
+    progress(1.0, f"Done! {len(all_candidates)} candidates in {elapsed:.1f}s / 完成！{len(all_candidates)} 个候选，{elapsed:.1f}秒")
+    return gallery_html, combined_svg
+@spaces.GPU
+def gradio_image_to_svg(image, num_candidates, temperature, top_p, top_k, repetition_penalty,
+                        replace_background, progress=gr.Progress()):
+    """Gradio interface - image-to-svg with background handling"""
+    if image is None:
+        return (
+            '<div style="text-align:center;color:#999;padding:50px;">Please upload an image / 请上传图片</div>',
+            "",
+            None
+        )
+    progress(0, "Processing input image / 处理输入图片...")
+    gc.collect()
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+    start_time = time.time()
+    # Preprocess image with optional background replacement
+    img_processed, was_modified = preprocess_image_for_svg(
+        image,
+        replace_background=replace_background,
+        target_size=TARGET_IMAGE_SIZE
+    )
+    if was_modified:
+        progress(0.05, "Background processed / 背景已处理")
+    # Save temp file
     with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp_file:
+        img_processed.save(tmp_file.name, format='PNG', quality=100)
         tmp_path = tmp_file.name
     try:
+        progress(0.1, "Preparing model inputs / 准备模型输入...")
+        inputs = prepare_inputs("image-to-svg", tmp_path)
+        max_length = config['model']['max_length']
+        def update_progress(val, msg):
+            progress(val, msg)
+        all_candidates = generate_candidates(
+            inputs, "image-to-svg", "image",
+            temperature, top_p, int(top_k), repetition_penalty,
+            max_length, int(num_candidates),
+            progress_callback=update_progress
+        )
+        elapsed = time.time() - start_time
+        if not all_candidates:
+            return (
+                '<div style="text-align:center;color:#999;padding:50px;">No valid SVG generated. Try adjusting parameters.<br/>未生成有效的SVG。请尝试调整参数。</div>',
+                f"<!-- No valid SVG (took {elapsed:.1f}s) -->",
+                img_processed
+            )
+        svg_codes = []
+        for i, cand in enumerate(all_candidates):
+            svg_codes.append(f"<!-- ====== Candidate {i+1} | Paths: {cand['path_count']} ====== -->\n{cand['svg']}")
+        combined_svg = "\n\n".join(svg_codes)
+        gallery_html = create_gallery_html(all_candidates)
+        progress(1.0, f"Done! {len(all_candidates)} candidates in {elapsed:.1f}s")
+        return gallery_html, combined_svg, img_processed
     finally:
+        if os.path.exists(tmp_path):
+            os.unlink(tmp_path)
+def get_example_images():
+    """Get example images from the examples directory"""
+    example_dir = "./examples"
+    example_images = []
+    if os.path.exists(example_dir):
+        for ext in SUPPORTED_FORMATS:
+            pattern = os.path.join(example_dir, f"*{ext}")
+            example_images.extend(glob.glob(pattern))
+        example_images.sort()
+    return example_images
 def create_interface():
+    """Create Gradio interface"""
+    # 30 Example prompts covering various categories
     example_texts = [
+        # === Simple Icons (1-6) ===
+        "A black triangle pointing downward, centrally positioned on white background.",
+        "A red heart shape with smooth curved edges, centered on white background.",
+        "A yellow star with five sharp points, simple geometric design, flat color on white background.",
+        "A blue arrow pointing to the right, thick solid shape, centered on white background.",
+        "A green circle with a white checkmark inside, centered on white background.",
+        "A black plus sign with equal length arms, thick lines, centered on white background.",
+        # === Characters & People (7-12) ===
+        "A simple person standing: round beige head, rectangular blue shirt body, two dark gray rectangular legs, arms at sides. Flat colors, white background.",
+        "A girl with long black hair, wearing pink dress with triangular skirt, small circular face with dot eyes and curved smile. Simple cartoon style, white background.",
+        "A businessman: circular head with short black hair, rectangular dark navy suit body, straight standing pose. Professional minimal style, white background.",
+        "A child waving: large round head with brown messy hair, big circular eyes, small body in red t-shirt and blue shorts, one arm raised. Cheerful cartoon style.",
+        "A person sitting on chair: side view, round head, rectangular torso in green sweater, bent legs on simple chair shape. Relaxed pose, white background.",
+        "A running person: side view silhouette in black, dynamic pose with one leg forward, arms pumping. Motion style, white background.",
+        # === Avatars & Portraits (13-17) ===
+        "Circular avatar: person with short black hair, round face with two dot eyes and small curved smile, wearing blue collar shirt. Minimal style, centered in circle.",
+        "Female avatar: oval face with long wavy brown hair, simple eyes, pink lips, wearing v-neck purple top. Soft cartoon style in circular frame.",
+        "Profile silhouette avatar: black side view of head with short hair and glasses outline, facing right. Simple solid shape on white.",
+        "Cute cartoon avatar: round face with big sparkly eyes, rosy cheeks, short bob haircut in orange. Kawaii style, circular frame.",
+        "Professional headshot avatar: person with neat hair, neutral expression, wearing suit collar. Corporate minimal style, circular frame, white background.",
+        # === Landscapes & Scenes (18-23) ===
+        "Layered mountain landscape: light blue sky at top, gray triangular snow-capped mountains in middle, dark green triangular pine trees at bottom. Flat colors.",
+        "Sunset beach scene: orange gradient sky at top, yellow semicircle sun on horizon, dark blue wavy ocean, tan beach strip at bottom. Simple shapes.",
+        "Forest scene: light blue sky, row of 5 dark green triangular pine trees of varying heights on brown trunks, light green grass at bottom.",
+        "City skyline at dusk: purple-orange gradient sky, row of black rectangular building silhouettes of different heights, some with yellow window squares.",
+        "Desert landscape: light orange sky with white circle sun, tan sand dunes as curved shapes, one green cactus with arms on the right side.",
+        "Countryside scene: blue sky with white fluffy clouds, green rolling hills, small red barn with white door in the center, yellow hay bales.",
+        # === Animals (24-27) ===
+        "Cute orange cat sitting: round head with two triangular ears, oval body, curved tail. Black outline cartoon style, facing forward, white background.",
+        "Simple black bird: oval body, round head, pointed triangular beak facing right, triangular tail, two stick legs. Silhouette style on white.",
+        "Friendly cartoon dog: brown oval body, round head with floppy ears, black dot nose, wagging curved tail, four short legs. Sitting pose.",
+        "Red fox logo: triangular orange face with pointed ears, white chest marking, bushy tail. Minimalist style, facing right, centered on white.",
+        # === Objects & Misc (28-30) ===
+        "Simple house icon: red triangular roof, beige rectangular walls, brown door in center, two blue square windows, green ground at bottom.",
+        "Coffee mug: brown cylindrical cup with curved handle on right, three wavy steam lines rising from top. Flat style on white background.",
+        "Open book: two rectangular white pages spread open, black text lines on each page, brown spine in center. Simple top-down view."
     ]
     example_images = get_example_images()
+    with gr.Blocks(title="OmniSVG Generator", theme=gr.themes.Soft(), css=CUSTOM_CSS) as demo:
+        # Header
+        gr.HTML("""
+        <div class="header-container">
+            <h1>🎨 OmniSVG Generator</h1>
+            <p>Transform images and text descriptions into scalable vector graphics</p>
+            <p style="font-size: 0.9em; opacity: 0.8;">将图像和文本描述转换为可缩放矢量图形</p>
+        </div>
+        """)
+        # Queue status
+        gr.HTML("""
+        <div style="background: #e7f3ff; border: 1px solid #b3d7ff; border-radius: 8px; padding: 12px 15px; margin-bottom: 15px;">
+            <span style="font-size: 1.5em;">ℹ️</span>
+            <strong>Queue System Active</strong> - Requests processed one at a time. Please wait patiently if busy.<br/>
+            <span style="color: #666;">队列系统已启用 - 请求按顺序处理，繁忙时请耐心等待。</span>
+        </div>
+        """)
+        # Tips section
+        gr.HTML(TIPS_HTML)
         with gr.Tabs():
+            # ==================== Image-to-SVG Tab ====================
+            with gr.TabItem("🖼️ Image-to-SVG", id="image-tab"):
+                gr.HTML(IMAGE_TIPS_HTML)
+                with gr.Row(equal_height=False):
+                    with gr.Column(scale=1, min_width=300):
+                        gr.Markdown("### 📤 Upload Image / 上传图片")
+                        image_input = gr.Image(
+                            label="Drag, upload, or Ctrl+V to paste / 拖拽、上传或Ctrl+V粘贴",
+                            type="pil",
+                            image_mode="RGBA",
+                            height=250,
+                            sources=["upload", "clipboard"],
+                            elem_classes=["input-image"]
+                        )
+                        with gr.Group(elem_classes=["settings-group"]):
+                            gr.Markdown("### ⚙️ Settings / 设置")
+                            img_num_candidates = gr.Slider(
+                                minimum=1, maximum=8, value=4, step=1,
+                                label="Number of Candidates / 候选数量"
+                            )
+                            img_replace_bg = gr.Checkbox(
+                                label="Replace non-white background / 替换非白色背景",
+                                value=True,
+                                info="Enable for images with colored backgrounds / 对有色背景图片启用"
+                            )
+                            with gr.Accordion("🔧 Advanced Parameters / 高级参数", open=False):
+                                img_temperature = gr.Slider(
+                                    minimum=0.1, maximum=1.0, value=0.3, step=0.05,
+                                    label="Temperature (Lower=accurate)",
+                                    info="0.2-0.4 recommended / 建议0.2-0.4"
+                                )
+                                img_top_p = gr.Slider(
+                                    minimum=0.5, maximum=1.0, value=0.90, step=0.02,
+                                    label="Top-P"
+                                )
+                                img_top_k = gr.Slider(
+                                    minimum=10, maximum=100, value=50, step=5,
+                                    label="Top-K"
+                                )
+                                img_rep_penalty = gr.Slider(
+                                    minimum=1.0, maximum=1.3, value=1.05, step=0.01,
+                                    label="Repetition Penalty"
+                                )
+                        image_generate_btn = gr.Button(
+                            "🚀 Generate SVG / 生成SVG",
+                            variant="primary",
+                            size="lg",
+                            elem_classes=["primary-btn"]
+                        )
                         if example_images:
+                            gr.Markdown("### 📁 Examples")
+                            gr.Examples(examples=example_images, inputs=[image_input], label="")
+                    with gr.Column(scale=2, min_width=500):
+                        gr.Markdown("### 📥 Processed Input / 处理后输入")
+                        image_processed = gr.Image(label="", type="pil", height=120)
+                        gr.Markdown("### 🖼️ Generated SVG Candidates / 生成的SVG候选")
+                        image_gallery = gr.HTML(
+                            value='<div style="text-align:center;color:#999;padding:50px;background:#fafafa;border-radius:12px;">Generated SVGs will appear here / 生成的SVG将显示在这里</div>'
+                        )
+                        gr.Markdown("### 📝 SVG Code")
+                        image_svg_output = gr.Code(label="", language="html", lines=10, elem_classes=["code-output"])
+                image_generate_btn.click(
+                    fn=gradio_image_to_svg,
+                    inputs=[image_input, img_num_candidates, img_temperature, img_top_p,
+                           img_top_k, img_rep_penalty, img_replace_bg],
+                    outputs=[image_gallery, image_svg_output, image_processed],
+                    queue=True
+                )
+            # ==================== Text-to-SVG Tab ====================
+            with gr.TabItem("✏️ Text-to-SVG", id="text-tab"):
+                with gr.Row(equal_height=False):
+                    with gr.Column(scale=1, min_width=300):
+                        gr.Markdown("### 📝 Description / 描述")
+                        gr.HTML("""
+                        <div style="background: #fff5f5; padding: 10px; border-radius: 8px; border-left: 4px solid #dc3545; margin-bottom: 10px;">
+                            <strong style="color: #dc3545;">🔴 Generate 4-8 candidates and pick the best!</strong><br/>
+                            生成4-8个候选结果并选择最好的！
+                        </div>
+                        """)
+                        text_input = gr.Textbox(
+                            label="",
+                            placeholder="Describe your SVG with geometric shapes and colors...\n用几何形状和颜色描述您的SVG...\n\nExample: A black triangle pointing downward, centrally positioned on white background.",
+                            lines=5
+                        )
+                        with gr.Group(elem_classes=["settings-group"]):
+                            gr.Markdown("### ⚙️ Settings / 设置")
+                            text_num_candidates = gr.Slider(
+                                minimum=1, maximum=8, value=6, step=1,
+                                label="Number of Candidates / 候选数量",
+                                info="More = better chances! / 越多越好！"
+                            )
+                            with gr.Accordion("🔧 Advanced Parameters / 高级参数", open=False):
+                                text_temperature = gr.Slider(
+                                    minimum=0.1, maximum=1.0, value=0.5, step=0.05,
+                                    label="Temperature",
+                                    info="Icons: 0.3-0.5 | Complex: 0.5-0.7"
+                                )
+                                text_top_p = gr.Slider(
+                                    minimum=0.5, maximum=1.0, value=0.90, step=0.02,
+                                    label="Top-P"
+                                )
+                                text_top_k = gr.Slider(
+                                    minimum=10, maximum=100, value=60, step=5,
+                                    label="Top-K"
+                                )
+                                text_rep_penalty = gr.Slider(
+                                    minimum=1.0, maximum=1.3, value=1.03, step=0.01,
+                                    label="Repetition Penalty",
+                                    info="Increase if you see repetitive patterns"
+                                )
+                        text_generate_btn = gr.Button(
+                            "🚀 Generate SVG / 生成SVG",
+                            variant="primary",
+                            size="lg",
+                            elem_classes=["primary-btn"]
+                        )
+                        gr.Markdown("### 📝 Example Prompts (30)")
+                        gr.Examples(
+                            examples=[[text] for text in example_texts],
+                            inputs=[text_input],
+                            label=""
+                        )
+                    with gr.Column(scale=2, min_width=500):
+                        gr.Markdown("### 🖼️ Generated SVG Candidates / 生成的SVG候选")
+                        gr.HTML("""
+                        <div style="background: #d4edda; padding: 10px; border-radius: 8px; margin-bottom: 10px;">
+                            <strong>💡 Pick the best from multiple candidates! / 从多个候选中选择最好的！</strong>
+                        </div>
+                        """)
+                        text_gallery = gr.HTML(
+                            value='<div style="text-align:center;color:#999;padding:50px;background:#fafafa;border-radius:12px;">Generated SVGs will appear here / 生成的SVG将显示在这里</div>'
+                        )
+                        gr.Markdown("### 📝 SVG Code")
+                        text_svg_output = gr.Code(label="", language="html", lines=12, elem_classes=["code-output"])
+                text_generate_btn.click(
+                    fn=gradio_text_to_svg,
+                    inputs=[text_input, text_num_candidates, text_temperature, text_top_p,
+                           text_top_k, text_rep_penalty],
+                    outputs=[text_gallery, text_svg_output],
+                    queue=True
+                )
+        # Footer
+        gr.HTML("""
+        <div class="footer">
+            <p>Built with ❤️ using OmniSVG</p>
+            <p style="color: #dc3545; font-weight: 600;">🔴 Remember: Generate 4-8 candidates and pick the best! / 记住：生成4-8个候选并选择最好的！</p>
+        </div>
+        """)
+    return demo
 if __name__ == "__main__":
     os.environ["TOKENIZERS_PARALLELISM"] = "false"
     args = parse_args()
+    print("="*60)
+    print("OmniSVG Generator - Gradio App")
+    print("="*60)
+    print(f"Model path: {args.model_path}")
+    print(f"Weight path: {args.weight_path}")
+    print(f"Device: {device}")
+    print("="*60)
+    print("\nLoading models...")
+    load_models(args.weight_path, args.model_path)
+    print("Models loaded successfully!\n")
     demo = create_interface()
+    demo.queue(default_concurrency_limit=1, max_size=20)
+    demo.launch(
+        server_name=args.listen,
+        server_port=args.port,
+        share=args.share,
+        debug=args.debug,
+    )