Spaces:

OmniSVG
/

OmniSVG-3B

Running on Zero

App Files Files Community

OmniSVG commited on Dec 3, 2025

Commit

fd45ec3

verified ·

1 Parent(s): 062fa0c

Update app.py

Browse files

Files changed (1) hide show

app.py +385 -221

app.py CHANGED Viewed

@@ -12,7 +12,6 @@ import glob
 import numpy as np
 import time
 import threading
-import copy
 import spaces
 from huggingface_hub import hf_hub_download, snapshot_download
@@ -22,167 +21,131 @@ from transformers import AutoTokenizer, AutoProcessor
 from qwen_vl_utils import process_vision_info
 from tokenizer import SVGTokenizer
-# ============================================================
-# Configuration Loading with Variant Support
-# ============================================================
-def load_config(config_path: str, variant: str = None) -> dict:
-    """
-    Load config file and merge variant-specific settings.
-    Args:
-        config_path: Path to the config.yaml file
-        variant: Model variant ("8B" or "4B"). If None, uses default_variant from config.
-    Returns:
-        Merged configuration dictionary
-    """
-    with open(config_path, 'r') as f:
-        raw_config = yaml.safe_load(f)
-    # Determine which variant to use
-    if variant is None:
-        variant = raw_config.get('default_variant', '8B')
-    # Check if variant exists
-    variants = raw_config.get('variants', {})
-    if variant not in variants:
-        available = list(variants.keys())
-        raise ValueError(f"Unknown model variant '{variant}'. Available variants: {available}")
-    # Start with a copy of raw config (excluding 'variants' key)
-    merged_config = {k: v for k, v in raw_config.items() if k != 'variants'}
-    # Merge variant-specific settings
-    variant_config = variants[variant]
-    for key, value in variant_config.items():
-        if isinstance(value, dict) and key in merged_config and isinstance(merged_config[key], dict):
-            # Deep merge for nested dicts
-            merged_config[key] = {**merged_config.get(key, {}), **value}
-        else:
-            merged_config[key] = value
-    # Store the active variant name
-    merged_config['active_variant'] = variant
-    return merged_config
-def write_variant_config(config: dict, output_path: str):
-    """
-    Write a variant-specific config file for SVGTokenizer.
-    Args:
-        config: Merged configuration dictionary
-        output_path: Path to write the temporary config file
-    """
-    # Create a config without the 'variants' and 'active_variant' keys
-    clean_config = {k: v for k, v in config.items()
-                    if k not in ['variants', 'active_variant', 'default_variant']}
-    with open(output_path, 'w') as f:
-        yaml.safe_dump(clean_config, f, default_flow_style=False)
-# ============================================================
-# Global Variables
-# ============================================================
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 DTYPE = torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16
-# Global Models
 tokenizer = None
 processor = None
 sketch_decoder = None
 svg_tokenizer = None
-# Global Config (will be set after loading)
-config = None
-MODEL_VARIANT = None
 # Thread lock for model inference
 generation_lock = threading.Lock()
-# Constants (will be set from config)
 SYSTEM_PROMPT = """You are an expert SVG code generator.
 Generate precise, valid SVG path commands that accurately represent the described scene or object.
 Focus on capturing key shapes, spatial relationships, and visual composition."""
 SUPPORTED_FORMATS = ['.png', '.jpg', '.jpeg', '.webp', '.bmp', '.gif']
-def init_config_constants(cfg: dict):
-    """Initialize global constants from config."""
-    global TARGET_IMAGE_SIZE, RENDER_SIZE, BACKGROUND_THRESHOLD
-    global EMPTY_THRESHOLD_ILLUSTRATION, EMPTY_THRESHOLD_ICON
-    global EDGE_SAMPLE_RATIO, COLOR_SIMILARITY_THRESHOLD, MIN_EDGE_SAMPLES
-    global BLACK_COLOR_TOKEN, BOS_TOKEN_ID, EOS_TOKEN_ID, PAD_TOKEN_ID, MAX_LENGTH
-    global DEFAULT_QWEN_MODEL, DEFAULT_OMNISVG_MODEL
-    global TASK_CONFIGS, DEFAULT_NUM_CANDIDATES, MAX_NUM_CANDIDATES, EXTRA_CANDIDATES_BUFFER
-    global MIN_SVG_LENGTH
-    # Image processing settings
-    image_config = cfg.get('image', {})
-    TARGET_IMAGE_SIZE = image_config.get('target_size', 448)
-    RENDER_SIZE = image_config.get('render_size', 512)
-    BACKGROUND_THRESHOLD = image_config.get('background_threshold', 240)
-    EMPTY_THRESHOLD_ILLUSTRATION = image_config.get('empty_threshold_illustration', 250)
-    EMPTY_THRESHOLD_ICON = image_config.get('empty_threshold_icon', 252)
-    EDGE_SAMPLE_RATIO = image_config.get('edge_sample_ratio', 0.1)
-    COLOR_SIMILARITY_THRESHOLD = image_config.get('color_similarity_threshold', 30)
-    MIN_EDGE_SAMPLES = image_config.get('min_edge_samples', 10)
-    # Color settings
-    colors_config = cfg.get('colors', {})
-    BLACK_COLOR_TOKEN = colors_config.get('black_color_token',
-                                          colors_config.get('color_token_start', 40010) + 2)
-    # Model settings
-    model_config = cfg.get('model', {})
-    BOS_TOKEN_ID = model_config.get('bos_token_id', 196998)
-    EOS_TOKEN_ID = model_config.get('eos_token_id', 196999)
-    PAD_TOKEN_ID = model_config.get('pad_token_id', 151643)
-    MAX_LENGTH = model_config.get('max_length', 1536)
-    # HuggingFace model IDs
-    hf_config = cfg.get('huggingface', {})
-    DEFAULT_QWEN_MODEL = hf_config.get('qwen_model', "Qwen/Qwen2.5-VL-7B-Instruct")
-    DEFAULT_OMNISVG_MODEL = hf_config.get('omnisvg_model', "OmniSVG/OmniSVG1.1_8B")
-    # Task configurations
-    task_config = cfg.get('task_configs', {})
-    TASK_CONFIGS = {
-        "text-to-svg-icon": task_config.get('text_to_svg_icon', {
-            "default_temperature": 0.5,
-            "default_top_p": 0.88,
-            "default_top_k": 50,
-            "default_repetition_penalty": 1.05,
-        }),
-        "text-to-svg-illustration": task_config.get('text_to_svg_illustration', {
-            "default_temperature": 0.6,
-            "default_top_p": 0.90,
-            "default_top_k": 60,
-            "default_repetition_penalty": 1.03,
-        }),
-        "image-to-svg": task_config.get('image_to_svg', {
-            "default_temperature": 0.3,
-            "default_top_p": 0.90,
-            "default_top_k": 50,
-            "default_repetition_penalty": 1.05,
-        })
-    }
-    # Generation parameters
-    gen_config = cfg.get('generation', {})
-    DEFAULT_NUM_CANDIDATES = gen_config.get('default_num_candidates', 4)
-    MAX_NUM_CANDIDATES = gen_config.get('max_num_candidates', 8)
-    EXTRA_CANDIDATES_BUFFER = gen_config.get('extra_candidates_buffer', 4)
-    # Validation settings
-    validation_config = cfg.get('validation', {})
-    MIN_SVG_LENGTH = validation_config.get('min_svg_length', 20)
-# Custom CSS (same as before)
 CUSTOM_CSS = """
 /* Main container centering */
 .gradio-container {
@@ -209,14 +172,18 @@ CUSTOM_CSS = """
     opacity: 0.9;
     font-size: 1.1em;
 }
-/* Model badge styling */
-.model-badge {
-    display: inline-block;
-    background: rgba(255,255,255,0.2);
-    padding: 4px 12px;
-    border-radius: 20px;
-    font-size: 0.85em;
-    margin-top: 8px;
 }
 /* Tips section */
 .tips-box {
@@ -295,6 +262,17 @@ CUSTOM_CSS = """
 .green-box strong {
     color: #4caf50;
 }
 /* Tab styling */
 .tabs {
     border-radius: 12px !important;
@@ -363,7 +341,7 @@ CUSTOM_CSS = """
 }
 """
-# Enhanced Tips HTML (same as before - abbreviated for brevity)
 TIPS_HTML = """
 <div class="tips-box">
     <h3>Prompting Guide & Best Practices</h3>
@@ -390,6 +368,15 @@ TIPS_HTML = """
         </ul>
     </div>
     <!-- Parameter Tuning Tips -->
     <div class="orange-box">
         <strong>Parameter Tuning Guide</strong>
@@ -455,15 +442,32 @@ TIPS_HTML = """
             <div class="example-prompt">
                 "A simple person: round beige head, rectangular blue shirt body, two dark gray rectangular legs. Standing pose, arms at sides, flat colors."
             </div>
             <p class="red-tip">Keep poses SIMPLE: standing, sitting, waving. Avoid complex actions!</p>
         </div>
         <div class="tip-category">
             <h4>Landscapes & Scenes</h4>
             <p>Layer elements from background to foreground. Specify color for EACH layer.</p>
             <div class="example-prompt">
                 "Layered landscape: light blue sky at top, gray triangular mountains in middle, dark green triangular pine trees at bottom. Flat colors, simple shapes."
             </div>
             <p class="red-tip">Use geometric shapes for nature: triangular trees, wavy water, semicircle sun!</p>
         </div>
@@ -473,10 +477,64 @@ TIPS_HTML = """
             <div class="example-prompt">
                 "Cute cat: orange round head with two triangular ears, oval orange body, curved tail. Simple cartoon style with black outlines, sitting pose."
             </div>
         </div>
     </div>
     <!-- Quick Troubleshooting -->
     <div class="green-box" style="margin-top: 15px;">
         <strong>Quick Troubleshooting</strong>
@@ -489,6 +547,17 @@ TIPS_HTML = """
             <li><strong>Inconsistent?</strong> <span class="red-tip">Generate MORE candidates (6-8) and pick the best!</span></li>
         </ul>
     </div>
 </div>
 """
@@ -513,10 +582,9 @@ def parse_args():
     parser.add_argument('--port', type=int, default=7860)
     parser.add_argument('--share', action='store_true')
     parser.add_argument('--debug', action='store_true')
-    parser.add_argument('--model_size', type=str, default=None, choices=['8B', '4B'],
-                        help='Model size variant to use (8B or 4B). Overrides config default.')
-    parser.add_argument('--config', type=str, default='./config.yaml',
-                        help='Path to config file (default: ./config.yaml)')
     parser.add_argument('--weight_path', type=str, default=None,
                         help='HuggingFace repo ID or local path for OmniSVG weights (overrides config)')
     parser.add_argument('--model_path', type=str, default=None,
@@ -527,6 +595,13 @@ def parse_args():
 def download_model_weights(repo_id: str, filename: str = "pytorch_model.bin") -> str:
     """
     Download model weights from Hugging Face Hub.
     """
     print(f"Downloading {filename} from {repo_id}...")
     try:
@@ -555,20 +630,29 @@ def is_local_path(path: str) -> bool:
     return False
-def load_models(weight_path: str, model_path: str, variant_config_path: str):
     """
-    Load all models with support for both local paths and HuggingFace Hub.
     Args:
-        weight_path: Local path or HuggingFace repo ID for OmniSVG weights
-        model_path: Local path or HuggingFace repo ID for Qwen model
-        variant_config_path: Path to the variant-specific config file for SVGTokenizer
     """
-    global tokenizer, processor, sketch_decoder, svg_tokenizer
-    print(f"Loading Qwen model from: {model_path}")
-    print(f"Loading OmniSVG weights from: {weight_path}")
-    print(f"Using precision: {DTYPE}")
     # Load Qwen tokenizer and processor
     print("\n[1/3] Loading tokenizer and processor...")
@@ -585,12 +669,14 @@ def load_models(weight_path: str, model_path: str, variant_config_path: str):
     processor.tokenizer.padding_side = "left"
     print("Tokenizer and processor loaded successfully!")
-    # Initialize sketch decoder
     print("\n[2/3] Initializing SketchDecoder...")
     sketch_decoder = SketchDecoder(
         pix_len=MAX_LENGTH,
         text_len=config.get('text', {}).get('max_length', 200),
-        model_path=model_path,
         torch_dtype=DTYPE
     )
@@ -618,14 +704,46 @@ def load_models(weight_path: str, model_path: str, variant_config_path: str):
     sketch_decoder = sketch_decoder.to(device).eval()
-    # Initialize SVG tokenizer with variant-specific config
-    svg_tokenizer = SVGTokenizer(variant_config_path)
     print("\n" + "="*60)
-    print("All models loaded successfully!")
     print("="*60 + "\n")
 def detect_text_subtype(text_prompt):
     """Auto-detect text prompt subtype"""
     text_lower = text_prompt.lower()
@@ -652,7 +770,9 @@ def detect_text_subtype(text_prompt):
 def detect_and_replace_background(image, threshold=None, edge_sample_ratio=None):
-    """Detect if image has non-white background and optionally replace it."""
     if threshold is None:
         threshold = BACKGROUND_THRESHOLD
     if edge_sample_ratio is None:
@@ -686,6 +806,11 @@ def detect_and_replace_background(image, threshold=None, edge_sample_ratio=None)
             return image, False
     if len(img_array.shape) == 3 and img_array.shape[2] >= 3:
         edge_colors = []
         for i in range(w):
             edge_colors.append(tuple(img_array[0, i, :3]))
@@ -713,7 +838,9 @@ def detect_and_replace_background(image, threshold=None, edge_sample_ratio=None)
 def preprocess_image_for_svg(image, replace_background=True, target_size=None):
-    """Preprocess image for SVG generation."""
     if target_size is None:
         target_size = TARGET_IMAGE_SIZE
@@ -889,7 +1016,8 @@ def generate_candidates(inputs, task_type, subtype, temperature, top_p, top_k, r
     all_candidates = []
-    gen_config = {
         'do_sample': True,
         'temperature': temperature,
         'top_p': top_p,
@@ -918,7 +1046,7 @@ def generate_candidates(inputs, task_type, subtype, temperature, top_p, top_k, r
                     max_new_tokens=max_length,
                     num_return_sequences=actual_samples,
                     use_cache=True,
-                    **gen_config
                 )
                 input_len = input_ids.shape[1]
@@ -996,7 +1124,7 @@ def gradio_text_to_svg(text_description, num_candidates, temperature, top_p, top
         return '<div style="text-align:center;color:#999;padding:50px;">Please enter a description</div>', ""
     print("\n" + "="*60)
-    print(f"[TASK] text-to-svg ({MODEL_VARIANT})")
     print(f"[INPUT] {text_description[:100]}{'...' if len(text_description) > 100 else ''}")
     print(f"[PARAMS] candidates={num_candidates}, temp={temperature}, top_p={top_p}, top_k={top_k}, rep_penalty={repetition_penalty}")
     print("="*60)
@@ -1062,7 +1190,7 @@ def gradio_image_to_svg(image, num_candidates, temperature, top_p, top_k, repeti
         )
     print("\n" + "="*60)
-    print(f"[TASK] image-to-svg ({MODEL_VARIANT})")
     print(f"[INPUT] Image size: {image.size if hasattr(image, 'size') else 'unknown'}, mode: {image.mode if hasattr(image, 'mode') else 'unknown'}")
     print(f"[PARAMS] candidates={num_candidates}, temp={temperature}, top_p={top_p}, top_k={top_k}, rep_penalty={repetition_penalty}, replace_bg={replace_background}")
     print("="*60)
@@ -1149,43 +1277,101 @@ def get_example_images():
 def create_interface():
     """Create Gradio interface"""
-    # Example prompts
     example_texts = [
         "A black triangle pointing downward, centrally positioned.",
         "A red heart shape with smooth curved edges, centered.",
         "A yellow star with five sharp points, simple geometric design, flat color.",
         "A blue arrow pointing to the right, thick solid shape, centered.",
         "A green circle with a white checkmark inside, centered.",
         "A black plus sign with equal length arms, thick lines, centered.",
         "A simple person standing: round beige head, rectangular blue shirt body, two dark gray rectangular legs, arms at sides. Flat colors.",
         "A girl with long black hair, wearing pink dress with triangular skirt, small circular face with dot eyes and curved smile. Simple cartoon style.",
         "Circular avatar: person with short black hair, round face with two dot eyes and small curved smile, wearing blue collar shirt. Minimal style, centered in circle.",
         "Layered mountain landscape: light blue sky at top, gray triangular snow-capped mountains in middle, dark green triangular pine trees at bottom. Flat colors.",
         "Sunset beach scene: orange gradient sky at top, yellow semicircle sun on horizon, dark blue wavy ocean, tan beach strip at bottom. Simple shapes.",
         "Cute orange cat sitting: round head with two triangular ears, oval body, curved tail. Black outline cartoon style, facing forward.",
         "Simple house icon: red triangular roof, beige rectangular walls, brown door in center, two blue square windows, green ground at bottom.",
         "Coffee mug: brown cylindrical cup with curved handle on right, three wavy steam lines rising from top. Flat style.",
-        "Red fox logo: triangular orange face with pointed ears, white chest marking, bushy tail. Minimalist style, facing right, centered.",
     ]
     example_images = get_example_images()
-    # Dynamic header with model info
-    header_html = f"""
-    <div class="header-container">
-        <h1>OmniSVG Generator</h1>
-        <p>Transform images and text descriptions into scalable vector graphics</p>
-        <div class="model-badge">Model: OmniSVG {MODEL_VARIANT} | Qwen: {DEFAULT_QWEN_MODEL.split('/')[-1]}</div>
-    </div>
-    """
-    with gr.Blocks(title=f"OmniSVG Generator ({MODEL_VARIANT})") as demo:
         # Header
-        gr.HTML(header_html)
         # Queue status
         gr.HTML("""
-        <div style="background: #e7f3ff; border: 1px solid #b3d7ff; border-radius: 8px; padding: 12px 15px; margin-bottom: 15px;">
             <span style="font-size: 1.5em;">ℹ️</span>
             <strong>Queue System Active</strong> - Requests processed one at a time. Please wait patiently if busy.
         </div>
@@ -1340,7 +1526,7 @@ def create_interface():
                             elem_classes=["primary-btn"]
                         )
-                        gr.Markdown("### Example Prompts")
                         gr.Examples(
                             examples=[[text] for text in example_texts],
                             inputs=[text_input],
@@ -1372,7 +1558,7 @@ def create_interface():
         # Footer
         gr.HTML(f"""
         <div class="footer">
-            <p>Built with OmniSVG {MODEL_VARIANT}</p>
             <p style="color: #dc3545; font-weight: 600;">Remember: Generate 4-8 candidates and pick the best!</p>
         </div>
         """)
@@ -1385,31 +1571,20 @@ if __name__ == "__main__":
     args = parse_args()
     print("="*60)
     print("OmniSVG Demo Page - Gradio App")
     print("="*60)
-    # Load config with variant support
-    print(f"\nLoading config from: {args.config}")
-    config = load_config(args.config, variant=args.model_size)
-    MODEL_VARIANT = config['active_variant']
-    # Initialize constants from config
-    init_config_constants(config)
-    # Override model paths if provided via command line
-    weight_path = args.weight_path if args.weight_path else DEFAULT_OMNISVG_MODEL
-    model_path = args.model_path if args.model_path else DEFAULT_QWEN_MODEL
-    print(f"\n[CONFIG] Active variant: {MODEL_VARIANT}")
-    print(f"[CONFIG] Qwen model: {model_path}")
-    print(f"[CONFIG] OmniSVG weights: {weight_path}")
-    print(f"[CONFIG] Device: {device}")
-    print(f"[CONFIG] Precision: {DTYPE}")
     print("="*60)
     # Print loaded config values
-    print("\n[CONFIG] Loaded settings:")
     print(f"  - TARGET_IMAGE_SIZE: {TARGET_IMAGE_SIZE}")
     print(f"  - RENDER_SIZE: {RENDER_SIZE}")
     print(f"  - BLACK_COLOR_TOKEN: {BLACK_COLOR_TOKEN}")
@@ -1417,21 +1592,10 @@ if __name__ == "__main__":
     print(f"  - BOS_TOKEN_ID: {BOS_TOKEN_ID}")
     print(f"  - EOS_TOKEN_ID: {EOS_TOKEN_ID}")
     print(f"  - PAD_TOKEN_ID: {PAD_TOKEN_ID}")
-    # Print variant-specific token offsets
-    print(f"\n[CONFIG] Variant-specific ({MODEL_VARIANT}):")
-    print(f"  - base_offset: {config.get('tokens', {}).get('base_offset', 'N/A')}")
-    print(f"  - color_start_offset: {config.get('colors', {}).get('color_start_offset', 'N/A')}")
-    print(f"  - color_end_offset: {config.get('colors', {}).get('color_end_offset', 'N/A')}")
     print("="*60)
-    # Write variant-specific config for SVGTokenizer
-    variant_config_path = f'./config_{MODEL_VARIANT.lower()}_runtime.yaml'
-    write_variant_config(config, variant_config_path)
-    print(f"\n[CONFIG] Written variant config to: {variant_config_path}")
     print("\nLoading models (may download from HuggingFace Hub if needed)...")
-    load_models(weight_path, model_path, variant_config_path)
     print("Models loaded successfully!\n")
     demo = create_interface()

 import numpy as np
 import time
 import threading
 import spaces
 from huggingface_hub import hf_hub_download, snapshot_download
 from qwen_vl_utils import process_vision_info
 from tokenizer import SVGTokenizer
+# Load config
+CONFIG_PATH = './config.yaml'
+with open(CONFIG_PATH, 'r') as f:
+    config = yaml.safe_load(f)
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 DTYPE = torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16
+# Global Models (will be loaded based on selected model size)
 tokenizer = None
 processor = None
 sketch_decoder = None
 svg_tokenizer = None
+current_model_size = None  # Track which model is currently loaded
 # Thread lock for model inference
 generation_lock = threading.Lock()
+model_loading_lock = threading.Lock()
+# Constants from config
 SYSTEM_PROMPT = """You are an expert SVG code generator.
 Generate precise, valid SVG path commands that accurately represent the described scene or object.
 Focus on capturing key shapes, spatial relationships, and visual composition."""
 SUPPORTED_FORMATS = ['.png', '.jpg', '.jpeg', '.webp', '.bmp', '.gif']
+AVAILABLE_MODEL_SIZES = list(config.get('models', {}).keys())
+DEFAULT_MODEL_SIZE = config.get('default_model_size', '8B')
+# ============================================================
+# Helper function to get config value (model-specific or shared)
+# ============================================================
+def get_config_value(model_size, *keys):
+    """Get config value with model-specific override support."""
+    # Try model-specific config first
+    model_cfg = config.get('models', {}).get(model_size, {})
+    value = model_cfg
+    for key in keys:
+        if isinstance(value, dict) and key in value:
+            value = value[key]
+        else:
+            value = None
+            break
+    # Fallback to shared config if not found
+    if value is None:
+        value = config
+        for key in keys:
+            if isinstance(value, dict) and key in value:
+                value = value[key]
+            else:
+                return None
+    return value
+# ============================================================
+# Image processing settings from config (shared)
+# ============================================================
+image_config = config.get('image', {})
+TARGET_IMAGE_SIZE = image_config.get('target_size', 448)
+RENDER_SIZE = image_config.get('render_size', 512)
+BACKGROUND_THRESHOLD = image_config.get('background_threshold', 240)
+EMPTY_THRESHOLD_ILLUSTRATION = image_config.get('empty_threshold_illustration', 250)
+EMPTY_THRESHOLD_ICON = image_config.get('empty_threshold_icon', 252)
+EDGE_SAMPLE_RATIO = image_config.get('edge_sample_ratio', 0.1)
+COLOR_SIMILARITY_THRESHOLD = image_config.get('color_similarity_threshold', 30)
+MIN_EDGE_SAMPLES = image_config.get('min_edge_samples', 10)
+# ============================================================
+# Color settings from config (shared)
+# ============================================================
+colors_config = config.get('colors', {})
+BLACK_COLOR_TOKEN = colors_config.get('black_color_token',
+                                       colors_config.get('color_token_start', 40010) + 2)
+# ============================================================
+# Model settings from config (shared)
+# ============================================================
+model_config = config.get('model', {})
+BOS_TOKEN_ID = model_config.get('bos_token_id', 196998)
+EOS_TOKEN_ID = model_config.get('eos_token_id', 196999)
+PAD_TOKEN_ID = model_config.get('pad_token_id', 151643)
+MAX_LENGTH = model_config.get('max_length', 1536)
+# ============================================================
+# Task configurations with defaults from config (shared)
+# ============================================================
+task_config = config.get('task_configs', {})
+TASK_CONFIGS = {
+    "text-to-svg-icon": task_config.get('text_to_svg_icon', {
+        "default_temperature": 0.5,
+        "default_top_p": 0.88,
+        "default_top_k": 50,
+        "default_repetition_penalty": 1.05,
+    }),
+    "text-to-svg-illustration": task_config.get('text_to_svg_illustration', {
+        "default_temperature": 0.6,
+        "default_top_p": 0.90,
+        "default_top_k": 60,
+        "default_repetition_penalty": 1.03,
+    }),
+    "image-to-svg": task_config.get('image_to_svg', {
+        "default_temperature": 0.3,
+        "default_top_p": 0.90,
+        "default_top_k": 50,
+        "default_repetition_penalty": 1.05,
+    })
+}
+# ============================================================
+# Generation parameters from config (shared)
+# ============================================================
+gen_config = config.get('generation', {})
+DEFAULT_NUM_CANDIDATES = gen_config.get('default_num_candidates', 4)
+MAX_NUM_CANDIDATES = gen_config.get('max_num_candidates', 8)
+EXTRA_CANDIDATES_BUFFER = gen_config.get('extra_candidates_buffer', 4)
+# ============================================================
+# Validation settings from config (shared)
+# ============================================================
+validation_config = config.get('validation', {})
+MIN_SVG_LENGTH = validation_config.get('min_svg_length', 20)
+# Custom CSS
 CUSTOM_CSS = """
 /* Main container centering */
 .gradio-container {
     opacity: 0.9;
     font-size: 1.1em;
 }
+/* Model selector styling */
+.model-selector {
+    background: #f0f4f8;
+    border: 2px solid #667eea;
+    border-radius: 12px;
+    padding: 15px;
+    margin-bottom: 20px;
+}
+.model-selector-title {
+    font-weight: 700;
+    color: #667eea;
+    margin-bottom: 10px;
 }
 /* Tips section */
 .tips-box {
 .green-box strong {
     color: #4caf50;
 }
+.blue-box {
+    background: #e3f2fd;
+    border: 1px solid #90caf9;
+    border-left: 4px solid #2196f3;
+    padding: 12px;
+    border-radius: 8px;
+    margin: 10px 0;
+}
+.blue-box strong {
+    color: #2196f3;
+}
 /* Tab styling */
 .tabs {
     border-radius: 12px !important;
 }
 """
+# Enhanced Tips HTML
 TIPS_HTML = """
 <div class="tips-box">
     <h3>Prompting Guide & Best Practices</h3>
         </ul>
     </div>
+    <!-- Model Selection Tips -->
+    <div class="blue-box">
+        <strong>Model Selection Guide</strong>
+        <ul style="margin: 8px 0 0 0; padding-left: 20px;">
+            <li><strong>8B Model:</strong> Higher quality, more details, better for complex illustrations. Requires more VRAM (~16GB+).</li>
+            <li><strong>4B Model:</strong> Faster, less VRAM required (~8GB+). Good for simple icons and basic shapes.</li>
+        </ul>
+    </div>
     <!-- Parameter Tuning Tips -->
     <div class="orange-box">
         <strong>Parameter Tuning Guide</strong>
             <div class="example-prompt">
                 "A simple person: round beige head, rectangular blue shirt body, two dark gray rectangular legs. Standing pose, arms at sides, flat colors."
             </div>
+            <div class="example-prompt">
+                "A girl with long black hair, pink dress with triangular skirt shape, small circular face with dot eyes and curved smile. Simple cartoon style."
+            </div>
             <p class="red-tip">Keep poses SIMPLE: standing, sitting, waving. Avoid complex actions!</p>
         </div>
+        <div class="tip-category">
+            <h4>Avatars & Portraits</h4>
+            <p>Use circular frame, focus on face and upper body only.</p>
+            <div class="example-prompt">
+                "Circular avatar: person with short black hair, round face with two dot eyes and small curved smile, wearing blue collar shirt. Minimal style."
+            </div>
+            <div class="example-prompt">
+                "Profile avatar silhouette: black side view of head with short hair, facing right. Simple solid shape."
+            </div>
+        </div>
         <div class="tip-category">
             <h4>Landscapes & Scenes</h4>
             <p>Layer elements from background to foreground. Specify color for EACH layer.</p>
             <div class="example-prompt">
                 "Layered landscape: light blue sky at top, gray triangular mountains in middle, dark green triangular pine trees at bottom. Flat colors, simple shapes."
             </div>
+            <div class="example-prompt">
+                "Sunset beach: orange gradient sky at top, yellow semicircle sun on horizon, dark blue wavy ocean below, tan beach at bottom."
+            </div>
             <p class="red-tip">Use geometric shapes for nature: triangular trees, wavy water, semicircle sun!</p>
         </div>
             <div class="example-prompt">
                 "Cute cat: orange round head with two triangular ears, oval orange body, curved tail. Simple cartoon style with black outlines, sitting pose."
             </div>
+            <div class="example-prompt">
+                "Simple black bird: oval body, small round head, pointed triangular beak facing right, triangular tail, two stick legs. Silhouette style."
+            </div>
+        </div>
+        <div class="tip-category">
+            <h4>Buildings & Objects</h4>
+            <p>Use basic shapes: rectangles for walls, triangles for roofs, squares for windows.</p>
+            <div class="example-prompt">
+                "Simple house: red triangular roof on top, beige rectangular wall, brown rectangular door in center, two small blue square windows. Green ground at bottom."
+            </div>
+            <div class="example-prompt">
+                "Coffee mug: brown cylindrical cup shape with curved handle on right side, three wavy steam lines rising from top. Simple flat style."
+            </div>
         </div>
     </div>
+    <!-- Extended Examples Section -->
+    <div style="margin-top: 20px; padding: 15px; background: #f0f7ff; border-radius: 10px; border: 1px solid #cce5ff;">
+        <h4 style="margin-top: 0; color: #0066cc;">More Complex Examples (Generate 6-8 candidates!)</h4>
+        <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(280px, 1fr)); gap: 12px; margin-top: 15px;">
+            <div class="example-prompt">
+                <strong>Business Avatar:</strong><br/>
+                "Circular professional avatar: man with short black hair, neutral skin tone round face, wearing dark navy suit with white shirt collar visible. Clean minimal style, centered in circle."
+            </div>
+            <div class="example-prompt">
+                <strong>Female Portrait:</strong><br/>
+                "Simple female face: oval face shape, long brown wavy hair on sides, two dot eyes, small nose, curved smile lips. Pink blush on cheeks. Cartoon portrait style."
+            </div>
+            <div class="example-prompt">
+                <strong>Child Character:</strong><br/>
+                "Cute child standing: large round head with short brown hair, big circular eyes with white highlights, small body in red t-shirt and blue shorts, simple stick arms and legs. Cheerful cartoon style."
+            </div>
+            <div class="example-prompt">
+                <strong>Active Pose:</strong><br/>
+                "Person walking: side view, circular head, rectangular torso in green jacket, legs in walking position (one forward, one back). Simple geometric style, moving right."
+            </div>
+            <div class="example-prompt">
+                <strong>Forest Scene:</strong><br/>
+                "Simple forest: light blue sky, row of 5 dark green triangular pine trees of varying heights, brown rectangular trunks, light green grass strip at bottom. Layered flat design."
+            </div>
+            <div class="example-prompt">
+                <strong>Ocean View:</strong><br/>
+                "Minimalist ocean: gradient blue sky at top, three horizontal wavy lines in dark blue for ocean, small white sailboat with triangular sail in center. Clean vector style."
+            </div>
+            <div class="example-prompt">
+                <strong>City Skyline:</strong><br/>
+                "Simple city skyline: orange sunset sky gradient, row of black rectangular building silhouettes of different heights, some with small yellow square windows. Minimalist style."
+            </div>
+            <div class="example-prompt">
+                <strong>Dog Character:</strong><br/>
+                "Friendly cartoon dog: brown oval body, round head with floppy ears, black dot nose, curved tail pointing up, four short legs. Sitting pose facing forward."
+            </div>
+        </div>
+    </div>
     <!-- Quick Troubleshooting -->
     <div class="green-box" style="margin-top: 15px;">
         <strong>Quick Troubleshooting</strong>
             <li><strong>Inconsistent?</strong> <span class="red-tip">Generate MORE candidates (6-8) and pick the best!</span></li>
         </ul>
     </div>
+    <!-- Prompt Template -->
+    <div style="margin-top: 15px; padding: 12px; background: #e8f5e9; border-radius: 8px; border-left: 4px solid #4caf50;">
+        <strong>Recommended Prompt Structure</strong>
+        <div style="background: white; padding: 10px; border-radius: 6px; margin-top: 8px; font-family: monospace; font-size: 0.9em;">
+            [Subject] + [Shape descriptions with colors] + [Position/orientation] + [Style]
+        </div>
+        <p style="margin: 10px 0 0 0; color: #2e7d32; font-size: 0.95em;">
+            Example: "A fox logo: triangular orange head, pointed ears, white chest marking, facing right. Minimalist flat style, centered."
+        </p>
+    </div>
 </div>
 """
     parser.add_argument('--port', type=int, default=7860)
     parser.add_argument('--share', action='store_true')
     parser.add_argument('--debug', action='store_true')
+    parser.add_argument('--model_size', type=str, default=None,
+                        choices=AVAILABLE_MODEL_SIZES,
+                        help=f'Model size to load at startup (default: {DEFAULT_MODEL_SIZE}). Can be changed in UI.')
     parser.add_argument('--weight_path', type=str, default=None,
                         help='HuggingFace repo ID or local path for OmniSVG weights (overrides config)')
     parser.add_argument('--model_path', type=str, default=None,
 def download_model_weights(repo_id: str, filename: str = "pytorch_model.bin") -> str:
     """
     Download model weights from Hugging Face Hub.
+    Args:
+        repo_id: Hugging Face repository ID (e.g., 'OmniSVG/OmniSVG1.1_8B')
+        filename: Name of the weights file to download
+    Returns:
+        Local path to the downloaded file
     """
     print(f"Downloading {filename} from {repo_id}...")
     try:
     return False
+def load_models(model_size: str, weight_path: str = None, model_path: str = None):
     """
+    Load all models for a specific model size.
     Args:
+        model_size: Model size ("8B" or "4B")
+        weight_path: Local path or HuggingFace repo ID for OmniSVG weights (optional, uses config if None)
+        model_path: Local path or HuggingFace repo ID for Qwen model (optional, uses config if None)
     """
+    global tokenizer, processor, sketch_decoder, svg_tokenizer, current_model_size
+    # Use config values if not provided
+    if weight_path is None:
+        weight_path = get_config_value(model_size, 'huggingface', 'omnisvg_model')
+    if model_path is None:
+        model_path = get_config_value(model_size, 'huggingface', 'qwen_model')
+    print(f"\n{'='*60}")
+    print(f"Loading {model_size} Model")
+    print(f"{'='*60}")
+    print(f"Qwen model: {model_path}")
+    print(f"OmniSVG weights: {weight_path}")
+    print(f"Precision: {DTYPE}")
     # Load Qwen tokenizer and processor
     print("\n[1/3] Loading tokenizer and processor...")
     processor.tokenizer.padding_side = "left"
     print("Tokenizer and processor loaded successfully!")
+    # Initialize sketch decoder with model_size
     print("\n[2/3] Initializing SketchDecoder...")
     sketch_decoder = SketchDecoder(
+        config_path=CONFIG_PATH,
+        model_path=model_path,
+        model_size=model_size,
         pix_len=MAX_LENGTH,
         text_len=config.get('text', {}).get('max_length', 200),
         torch_dtype=DTYPE
     )
     sketch_decoder = sketch_decoder.to(device).eval()
+    # Initialize SVG tokenizer with model_size
+    svg_tokenizer = SVGTokenizer(CONFIG_PATH, model_size=model_size)
+    current_model_size = model_size
     print("\n" + "="*60)
+    print(f"All {model_size} models loaded successfully!")
     print("="*60 + "\n")
+def switch_model(new_model_size: str):
+    """
+    Switch to a different model size.
+    Args:
+        new_model_size: Target model size ("8B" or "4B")
+    Returns:
+        Status message
+    """
+    global current_model_size
+    if new_model_size == current_model_size:
+        return f"✅ Already using {new_model_size} model"
+    with model_loading_lock:
+        # Clear memory
+        gc.collect()
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+        try:
+            load_models(new_model_size)
+            return f"✅ Successfully switched to {new_model_size} model"
+        except Exception as e:
+            error_msg = f"❌ Failed to switch to {new_model_size}: {str(e)}"
+            print(error_msg)
+            return error_msg
 def detect_text_subtype(text_prompt):
     """Auto-detect text prompt subtype"""
     text_lower = text_prompt.lower()
 def detect_and_replace_background(image, threshold=None, edge_sample_ratio=None):
+    """
+    Detect if image has non-white background and optionally replace it.
+    """
     if threshold is None:
         threshold = BACKGROUND_THRESHOLD
     if edge_sample_ratio is None:
             return image, False
     if len(img_array.shape) == 3 and img_array.shape[2] >= 3:
+        if img_array.shape[2] == 4:
+            gray = np.mean(img_array[:, :, :3], axis=2)
+        else:
+            gray = np.mean(img_array, axis=2)
         edge_colors = []
         for i in range(w):
             edge_colors.append(tuple(img_array[0, i, :3]))
 def preprocess_image_for_svg(image, replace_background=True, target_size=None):
+    """
+    Preprocess image for SVG generation.
+    """
     if target_size is None:
         target_size = TARGET_IMAGE_SIZE
     all_candidates = []
+    # Generation config with user parameters
+    gen_cfg = {
         'do_sample': True,
         'temperature': temperature,
         'top_p': top_p,
                     max_new_tokens=max_length,
                     num_return_sequences=actual_samples,
                     use_cache=True,
+                    **gen_cfg
                 )
                 input_len = input_ids.shape[1]
         return '<div style="text-align:center;color:#999;padding:50px;">Please enter a description</div>', ""
     print("\n" + "="*60)
+    print(f"[TASK] text-to-svg (Model: {current_model_size})")
     print(f"[INPUT] {text_description[:100]}{'...' if len(text_description) > 100 else ''}")
     print(f"[PARAMS] candidates={num_candidates}, temp={temperature}, top_p={top_p}, top_k={top_k}, rep_penalty={repetition_penalty}")
     print("="*60)
         )
     print("\n" + "="*60)
+    print(f"[TASK] image-to-svg (Model: {current_model_size})")
     print(f"[INPUT] Image size: {image.size if hasattr(image, 'size') else 'unknown'}, mode: {image.mode if hasattr(image, 'mode') else 'unknown'}")
     print(f"[PARAMS] candidates={num_candidates}, temp={temperature}, top_p={top_p}, top_k={top_k}, rep_penalty={repetition_penalty}, replace_bg={replace_background}")
     print("="*60)
 def create_interface():
     """Create Gradio interface"""
+    # 30 Example prompts covering various categories
     example_texts = [
+        # === Simple Icons (1-6) ===
         "A black triangle pointing downward, centrally positioned.",
         "A red heart shape with smooth curved edges, centered.",
         "A yellow star with five sharp points, simple geometric design, flat color.",
         "A blue arrow pointing to the right, thick solid shape, centered.",
         "A green circle with a white checkmark inside, centered.",
         "A black plus sign with equal length arms, thick lines, centered.",
+        # === Characters & People (7-12) ===
         "A simple person standing: round beige head, rectangular blue shirt body, two dark gray rectangular legs, arms at sides. Flat colors.",
         "A girl with long black hair, wearing pink dress with triangular skirt, small circular face with dot eyes and curved smile. Simple cartoon style.",
+        "A child waving: large round head with brown messy hair, big circular eyes, small body in red t-shirt and blue shorts, one arm raised. Cheerful cartoon style.",
+        "A person sitting on chair: side view, round head, rectangular torso in green sweater, bent legs on simple chair shape. Relaxed pose.",
+        "A running person: side view silhouette in black, dynamic pose with one leg forward, arms pumping. Motion style.",
+        # === Avatars & Portraits (13-17) ===
         "Circular avatar: person with short black hair, round face with two dot eyes and small curved smile, wearing blue collar shirt. Minimal style, centered in circle.",
+        "Female avatar: oval face with long wavy brown hair, simple eyes, pink lips, wearing v-neck purple top. Soft cartoon style in circular frame.",
+        "Profile silhouette avatar: black side view of head with short hair and glasses outline, facing right. Simple solid shape.",
+        "Cute cartoon avatar: round face with big sparkly eyes, rosy cheeks, short bob haircut in orange. Kawaii style, circular frame.",
+        "Professional headshot avatar: person with neat hair, neutral expression, wearing suit collar. Corporate minimal style, circular frame.",
+        # === Landscapes & Scenes (18-23) ===
         "Layered mountain landscape: light blue sky at top, gray triangular snow-capped mountains in middle, dark green triangular pine trees at bottom. Flat colors.",
         "Sunset beach scene: orange gradient sky at top, yellow semicircle sun on horizon, dark blue wavy ocean, tan beach strip at bottom. Simple shapes.",
+        "Forest scene: light blue sky, row of 5 dark green triangular pine trees of varying heights on brown trunks, light green grass at bottom.",
+        "City skyline at dusk: purple-orange gradient sky, row of black rectangular building silhouettes of different heights, some with yellow window squares.",
+        "Desert landscape: light orange sky with white circle sun, tan sand dunes as curved shapes, one green cactus with arms on the right side.",
+        "Countryside scene: blue sky with white fluffy clouds, green rolling hills, small red barn with white door in the center, yellow hay bales.",
+        # === Animals (24-27) ===
         "Cute orange cat sitting: round head with two triangular ears, oval body, curved tail. Black outline cartoon style, facing forward.",
+        "Simple black bird: oval body, round head, pointed triangular beak facing right, triangular tail, two stick legs. Silhouette style.",
+        "Friendly cartoon dog: brown oval body, round head with floppy ears, black dot nose, wagging curved tail, four short legs. Sitting pose.",
+        "Red fox logo: triangular orange face with pointed ears, white chest marking, bushy tail. Minimalist style, facing right, centered.",
+        # === Objects & Misc (28-30) ===
         "Simple house icon: red triangular roof, beige rectangular walls, brown door in center, two blue square windows, green ground at bottom.",
         "Coffee mug: brown cylindrical cup with curved handle on right, three wavy steam lines rising from top. Flat style.",
+        "Open book: two rectangular white pages spread open, black text lines on each page, brown spine in center. Simple top-down view."
     ]
     example_images = get_example_images()
+    with gr.Blocks(title="OmniSVG Generator", css=CUSTOM_CSS) as demo:
         # Header
+        gr.HTML("""
+        <div class="header-container">
+            <h1>OmniSVG Generator</h1>
+            <p>Transform images and text descriptions into scalable vector graphics</p>
+        </div>
+        """)
+        # Model Selection Section
+        with gr.Row():
+            with gr.Column():
+                gr.HTML("""
+                <div class="blue-box">
+                    <strong>🔧 Model Selection</strong>
+                    <p style="margin: 5px 0 0 0; font-size: 0.9em;">
+                        Choose between <b>8B</b> (higher quality, more VRAM) or <b>4B</b> (faster, less VRAM).
+                    </p>
+                </div>
+                """)
+        with gr.Row():
+            model_selector = gr.Dropdown(
+                choices=AVAILABLE_MODEL_SIZES,
+                value=DEFAULT_MODEL_SIZE,
+                label="Model Size",
+                info="8B: ~16GB VRAM, higher quality | 4B: ~8GB VRAM, faster",
+                interactive=True,
+                scale=1
+            )
+            model_status = gr.Textbox(
+                label="Model Status",
+                value=f"✅ Ready: {DEFAULT_MODEL_SIZE} model loaded",
+                interactive=False,
+                scale=2
+            )
+            switch_btn = gr.Button("Switch Model", variant="secondary", scale=1)
+        # Model switch handler
+        switch_btn.click(
+            fn=switch_model,
+            inputs=[model_selector],
+            outputs=[model_status],
+            queue=True
+        )
         # Queue status
         gr.HTML("""
+        <div style="background: #e7f3ff; border: 1px solid #b3d7ff; border-radius: 8px; padding: 12px 15px; margin: 15px 0;">
             <span style="font-size: 1.5em;">ℹ️</span>
             <strong>Queue System Active</strong> - Requests processed one at a time. Please wait patiently if busy.
         </div>
                             elem_classes=["primary-btn"]
                         )
+                        gr.Markdown("### Example Prompts (30)")
                         gr.Examples(
                             examples=[[text] for text in example_texts],
                             inputs=[text_input],
         # Footer
         gr.HTML(f"""
         <div class="footer">
+            <p>Built with OmniSVG | Current Model: <strong>{DEFAULT_MODEL_SIZE}</strong></p>
             <p style="color: #dc3545; font-weight: 600;">Remember: Generate 4-8 candidates and pick the best!</p>
         </div>
         """)
     args = parse_args()
+    # Determine initial model size
+    initial_model_size = args.model_size or DEFAULT_MODEL_SIZE
     print("="*60)
     print("OmniSVG Demo Page - Gradio App")
     print("="*60)
+    print(f"Available model sizes: {AVAILABLE_MODEL_SIZES}")
+    print(f"Initial model size: {initial_model_size}")
+    print(f"Device: {device}")
+    print(f"Precision: {DTYPE}")
     print("="*60)
     # Print loaded config values
+    print("\n[CONFIG] Shared settings:")
     print(f"  - TARGET_IMAGE_SIZE: {TARGET_IMAGE_SIZE}")
     print(f"  - RENDER_SIZE: {RENDER_SIZE}")
     print(f"  - BLACK_COLOR_TOKEN: {BLACK_COLOR_TOKEN}")
     print(f"  - BOS_TOKEN_ID: {BOS_TOKEN_ID}")
     print(f"  - EOS_TOKEN_ID: {EOS_TOKEN_ID}")
     print(f"  - PAD_TOKEN_ID: {PAD_TOKEN_ID}")
     print("="*60)
     print("\nLoading models (may download from HuggingFace Hub if needed)...")
+    load_models(initial_model_size, args.weight_path, args.model_path)
     print("Models loaded successfully!\n")
     demo = create_interface()