Spaces:

OmniSVG
/

OmniSVG-3B

Running on Zero

App Files Files Community

OmniSVG commited on 21 days ago

Commit

ab5aebd

verified ·

1 Parent(s): 84487e6

Update app.py

Browse files

Files changed (1) hide show

app.py +133 -47

app.py CHANGED Viewed

@@ -37,41 +37,89 @@ svg_tokenizer = None
 # Thread lock for model inference
 generation_lock = threading.Lock()
-# Constants
 SYSTEM_PROMPT = """You are an expert SVG code generator.
 Generate precise, valid SVG path commands that accurately represent the described scene or object.
 Focus on capturing key shapes, spatial relationships, and visual composition."""
 SUPPORTED_FORMATS = ['.png', '.jpg', '.jpeg', '.webp', '.bmp', '.gif']
-TARGET_IMAGE_SIZE = 448
-BLACK_COLOR_TOKEN = 40012
 # Default Hugging Face model IDs
-DEFAULT_QWEN_MODEL = "Qwen/Qwen2.5-VL-7B-Instruct"
-DEFAULT_OMNISVG_MODEL = "OmniSVG/OmniSVG1.1_8B"
-# Task configurations with defaults
 TASK_CONFIGS = {
-    "text-to-svg-icon": {
         "default_temperature": 0.5,
         "default_top_p": 0.88,
         "default_top_k": 50,
         "default_repetition_penalty": 1.05,
-    },
-    "text-to-svg-illustration": {
         "default_temperature": 0.6,
         "default_top_p": 0.90,
         "default_top_k": 60,
         "default_repetition_penalty": 1.03,
-    },
-    "image-to-svg": {
         "default_temperature": 0.3,
         "default_top_p": 0.90,
         "default_top_k": 50,
         "default_repetition_penalty": 1.05,
-    }
 }
 # Custom CSS
 CUSTOM_CSS = """
 /* Main container centering */
@@ -556,8 +604,8 @@ def load_models(weight_path: str, model_path: str):
     # Initialize sketch decoder
     print("\n[2/3] Initializing SketchDecoder...")
     sketch_decoder = SketchDecoder(
-        pix_len=config['model']['max_length'],
-        text_len=200,
         model_path=model_path,
         torch_dtype=DTYPE
     )
@@ -625,18 +673,24 @@ def detect_text_subtype(text_prompt):
     return "icon"
-def detect_and_replace_background(image, threshold=240, edge_sample_ratio=0.1):
     """
     Detect if image has non-white background and optionally replace it.
     Args:
         image: PIL Image (RGB or RGBA)
-        threshold: Pixel values above this are considered "white"
-        edge_sample_ratio: Ratio of edge pixels to sample
     Returns:
         tuple: (processed_image, background_was_replaced)
     """
     img_array = np.array(image)
     # If already has alpha channel, composite onto white
@@ -651,7 +705,7 @@ def detect_and_replace_background(image, threshold=240, edge_sample_ratio=0.1):
     edge_pixels = []
     # Sample from all 4 edges
-    sample_count = max(10, int(min(h, w) * edge_sample_ratio))
     # Top and bottom edges
     for i in range(0, w, max(1, w // sample_count)):
@@ -697,7 +751,7 @@ def detect_and_replace_background(image, threshold=240, edge_sample_ratio=0.1):
         # Create mask for background (colors similar to detected bg_color)
         color_diff = np.sqrt(np.sum((img_array[:, :, :3].astype(float) - np.array(bg_color)) ** 2, axis=2))
-        bg_mask = color_diff < 30  # Threshold for color similarity
         # Replace background with white
         result = img_array.copy()
@@ -711,18 +765,22 @@ def detect_and_replace_background(image, threshold=240, edge_sample_ratio=0.1):
     return image, False
-def preprocess_image_for_svg(image, replace_background=True, target_size=448):
     """
     Preprocess image for SVG generation.
     Args:
         image: Input PIL Image or path
         replace_background: Whether to replace non-white backgrounds
-        target_size: Target size for resizing
     Returns:
         tuple: (processed_pil_image, was_modified)
     """
     # Load image if path
     if isinstance(image, str):
         raw_img = Image.open(image)
@@ -792,8 +850,12 @@ Requirements:
     return inputs
-def render_svg_to_image(svg_str, size=512):
     """Render SVG to high-quality PIL Image"""
     try:
         png_data = cairosvg.svg2png(
             bytestring=svg_str.encode('utf-8'),
@@ -858,7 +920,7 @@ def create_gallery_html(candidates, cols=4):
 def is_valid_candidate(svg_str, img, subtype="illustration"):
     """Check candidate validity"""
-    if not svg_str or len(svg_str) < 20:
         return False, "too_short"
     if '<svg' not in svg_str:
@@ -870,7 +932,7 @@ def is_valid_candidate(svg_str, img, subtype="illustration"):
     img_array = np.array(img)
     mean_val = img_array.mean()
-    threshold = 250 if subtype == "illustration" else 252
     if mean_val > threshold:
         return False, "empty_image"
@@ -907,12 +969,12 @@ def generate_candidates(inputs, task_type, subtype, temperature, top_p, top_k, r
         'repetition_penalty': repetition_penalty,
         'early_stopping': True,
         'no_repeat_ngram_size': 0,
-        'eos_token_id': config['model']['eos_token_id'],
-        'pad_token_id': config['model']['pad_token_id'],
-        'bos_token_id': config['model']['bos_token_id'],
     }
-    actual_samples = num_samples + 4
     try:
         if progress_callback:
@@ -942,9 +1004,9 @@ def generate_candidates(inputs, task_type, subtype, temperature, top_p, top_k, r
                 current_ids = generated_ids_batch[i:i+1]
                 fake_wrapper = torch.cat([
-                    torch.full((1, 1), config['model']['bos_token_id'], device=device),
                     current_ids,
-                    torch.full((1, 1), config['model']['eos_token_id'], device=device)
                 ], dim=1)
                 generated_xy = svg_tokenizer.process_generated_tokens(fake_wrapper)
@@ -965,7 +1027,7 @@ def generate_candidates(inputs, task_type, subtype, temperature, top_p, top_k, r
                 if 'width=' not in svg_str:
                     svg_str = svg_str.replace('<svg', f'<svg width="{TARGET_IMAGE_SIZE}" height="{TARGET_IMAGE_SIZE}"', 1)
-                png_image = render_svg_to_image(svg_str, size=512)
                 is_valid, reason = is_valid_candidate(svg_str, png_image, subtype)
                 if is_valid:
@@ -1024,7 +1086,6 @@ def gradio_text_to_svg(text_description, num_candidates, temperature, top_p, top
     progress(0.05, f"Detected: {subtype}")
     inputs = prepare_inputs("text-to-svg", text_description.strip())
-    max_length = config['model']['max_length']
     def update_progress(val, msg):
         progress(val, msg)
@@ -1032,7 +1093,7 @@ def gradio_text_to_svg(text_description, num_candidates, temperature, top_p, top
     all_candidates = generate_candidates(
         inputs, "text-to-svg", subtype,
         temperature, top_p, int(top_k), repetition_penalty,
-        max_length, int(num_candidates),
         progress_callback=update_progress
     )
@@ -1105,7 +1166,6 @@ def gradio_image_to_svg(image, num_candidates, temperature, top_p, top_k, repeti
     try:
         progress(0.1, "Preparing model inputs...")
         inputs = prepare_inputs("image-to-svg", tmp_path)
-        max_length = config['model']['max_length']
         def update_progress(val, msg):
             progress(val, msg)
@@ -1113,7 +1173,7 @@ def gradio_image_to_svg(image, num_candidates, temperature, top_p, top_k, repeti
         all_candidates = generate_candidates(
             inputs, "image-to-svg", "image",
             temperature, top_p, int(top_k), repetition_penalty,
-            max_length, int(num_candidates),
             progress_callback=update_progress
         )
@@ -1249,7 +1309,7 @@ def create_interface():
                         with gr.Group(elem_classes=["settings-group"]):
                             gr.Markdown("### Settings")
                             img_num_candidates = gr.Slider(
-                                minimum=1, maximum=8, value=4, step=1,
                                 label="Number of Candidates"
                             )
                             img_replace_bg = gr.Checkbox(
@@ -1260,20 +1320,28 @@ def create_interface():
                             with gr.Accordion("Advanced Parameters", open=False):
                                 img_temperature = gr.Slider(
-                                    minimum=0.1, maximum=1.0, value=0.3, step=0.05,
                                     label="Temperature (Lower=accurate)",
                                     info="0.2-0.4 recommended"
                                 )
                                 img_top_p = gr.Slider(
-                                    minimum=0.5, maximum=1.0, value=0.90, step=0.02,
                                     label="Top-P"
                                 )
                                 img_top_k = gr.Slider(
-                                    minimum=10, maximum=100, value=50, step=5,
                                     label="Top-K"
                                 )
                                 img_rep_penalty = gr.Slider(
-                                    minimum=1.0, maximum=1.3, value=1.05, step=0.01,
                                     label="Repetition Penalty"
                                 )
@@ -1327,27 +1395,35 @@ def create_interface():
                         with gr.Group(elem_classes=["settings-group"]):
                             gr.Markdown("### Settings")
                             text_num_candidates = gr.Slider(
-                                minimum=1, maximum=8, value=6, step=1,
                                 label="Number of Candidates",
                                 info="More = better chances!"
                             )
                             with gr.Accordion("Advanced Parameters", open=False):
                                 text_temperature = gr.Slider(
-                                    minimum=0.1, maximum=1.0, value=0.5, step=0.05,
                                     label="Temperature",
                                     info="Icons: 0.3-0.5 | Complex: 0.5-0.7"
                                 )
                                 text_top_p = gr.Slider(
-                                    minimum=0.5, maximum=1.0, value=0.90, step=0.02,
                                     label="Top-P"
                                 )
                                 text_top_k = gr.Slider(
-                                    minimum=10, maximum=100, value=60, step=5,
                                     label="Top-K"
                                 )
                                 text_rep_penalty = gr.Slider(
-                                    minimum=1.0, maximum=1.3, value=1.03, step=0.01,
                                     label="Repetition Penalty",
                                     info="Increase if you see repetitive patterns"
                                 )
@@ -1413,6 +1489,17 @@ if __name__ == "__main__":
     print(f"Precision: {DTYPE}")
     print("="*60)
     print("\nLoading models (may download from HuggingFace Hub if needed)...")
     load_models(args.weight_path, args.model_path)
     print("Models loaded successfully!\n")
@@ -1426,5 +1513,4 @@ if __name__ == "__main__":
         server_port=args.port,
         share=args.share,
         debug=args.debug,
-    )

 # Thread lock for model inference
 generation_lock = threading.Lock()
+# Constants from config
 SYSTEM_PROMPT = """You are an expert SVG code generator.
 Generate precise, valid SVG path commands that accurately represent the described scene or object.
 Focus on capturing key shapes, spatial relationships, and visual composition."""
 SUPPORTED_FORMATS = ['.png', '.jpg', '.jpeg', '.webp', '.bmp', '.gif']
+# ============================================================
+# Image processing settings from config
+# ============================================================
+image_config = config.get('image', {})
+TARGET_IMAGE_SIZE = image_config.get('target_size', 448)
+RENDER_SIZE = image_config.get('render_size', 512)
+BACKGROUND_THRESHOLD = image_config.get('background_threshold', 240)
+EMPTY_THRESHOLD_ILLUSTRATION = image_config.get('empty_threshold_illustration', 250)
+EMPTY_THRESHOLD_ICON = image_config.get('empty_threshold_icon', 252)
+EDGE_SAMPLE_RATIO = image_config.get('edge_sample_ratio', 0.1)
+COLOR_SIMILARITY_THRESHOLD = image_config.get('color_similarity_threshold', 30)
+MIN_EDGE_SAMPLES = image_config.get('min_edge_samples', 10)
+# ============================================================
+# Color settings from config
+# ============================================================
+colors_config = config.get('colors', {})
+BLACK_COLOR_TOKEN = colors_config.get('black_color_token',
+                                       colors_config.get('color_token_start', 40010) + 2)
+# ============================================================
+# Model settings from config
+# ============================================================
+model_config = config.get('model', {})
+BOS_TOKEN_ID = model_config.get('bos_token_id', 196998)
+EOS_TOKEN_ID = model_config.get('eos_token_id', 196999)
+PAD_TOKEN_ID = model_config.get('pad_token_id', 151643)
+MAX_LENGTH = model_config.get('max_length', 3537)
+# ============================================================
 # Default Hugging Face model IDs
+# ============================================================
+hf_config = config.get('huggingface', {})
+DEFAULT_QWEN_MODEL = hf_config.get('qwen_model', "Qwen/Qwen2.5-VL-7B-Instruct")
+DEFAULT_OMNISVG_MODEL = hf_config.get('omnisvg_model', "OmniSVG/OmniSVG1.1_8B")
+# ============================================================
+# Task configurations with defaults from config
+# ============================================================
+task_config = config.get('task_configs', {})
 TASK_CONFIGS = {
+    "text-to-svg-icon": task_config.get('text_to_svg_icon', {
         "default_temperature": 0.5,
         "default_top_p": 0.88,
         "default_top_k": 50,
         "default_repetition_penalty": 1.05,
+    }),
+    "text-to-svg-illustration": task_config.get('text_to_svg_illustration', {
         "default_temperature": 0.6,
         "default_top_p": 0.90,
         "default_top_k": 60,
         "default_repetition_penalty": 1.03,
+    }),
+    "image-to-svg": task_config.get('image_to_svg', {
         "default_temperature": 0.3,
         "default_top_p": 0.90,
         "default_top_k": 50,
         "default_repetition_penalty": 1.05,
+    })
 }
+# ============================================================
+# Generation parameters from config
+# ============================================================
+gen_config = config.get('generation', {})
+DEFAULT_NUM_CANDIDATES = gen_config.get('default_num_candidates', 4)
+MAX_NUM_CANDIDATES = gen_config.get('max_num_candidates', 8)
+EXTRA_CANDIDATES_BUFFER = gen_config.get('extra_candidates_buffer', 4)
+# ============================================================
+# Validation settings from config
+# ============================================================
+validation_config = config.get('validation', {})
+MIN_SVG_LENGTH = validation_config.get('min_svg_length', 20)
 # Custom CSS
 CUSTOM_CSS = """
 /* Main container centering */
     # Initialize sketch decoder
     print("\n[2/3] Initializing SketchDecoder...")
     sketch_decoder = SketchDecoder(
+        pix_len=MAX_LENGTH,
+        text_len=config.get('text', {}).get('max_length', 200),
         model_path=model_path,
         torch_dtype=DTYPE
     )
     return "icon"
+def detect_and_replace_background(image, threshold=None, edge_sample_ratio=None):
     """
     Detect if image has non-white background and optionally replace it.
     Args:
         image: PIL Image (RGB or RGBA)
+        threshold: Pixel values above this are considered "white" (from config)
+        edge_sample_ratio: Ratio of edge pixels to sample (from config)
     Returns:
         tuple: (processed_image, background_was_replaced)
     """
+    # Use config values if not provided
+    if threshold is None:
+        threshold = BACKGROUND_THRESHOLD
+    if edge_sample_ratio is None:
+        edge_sample_ratio = EDGE_SAMPLE_RATIO
     img_array = np.array(image)
     # If already has alpha channel, composite onto white
     edge_pixels = []
     # Sample from all 4 edges
+    sample_count = max(MIN_EDGE_SAMPLES, int(min(h, w) * edge_sample_ratio))
     # Top and bottom edges
     for i in range(0, w, max(1, w // sample_count)):
         # Create mask for background (colors similar to detected bg_color)
         color_diff = np.sqrt(np.sum((img_array[:, :, :3].astype(float) - np.array(bg_color)) ** 2, axis=2))
+        bg_mask = color_diff < COLOR_SIMILARITY_THRESHOLD
         # Replace background with white
         result = img_array.copy()
     return image, False
+def preprocess_image_for_svg(image, replace_background=True, target_size=None):
     """
     Preprocess image for SVG generation.
     Args:
         image: Input PIL Image or path
         replace_background: Whether to replace non-white backgrounds
+        target_size: Target size for resizing (from config)
     Returns:
         tuple: (processed_pil_image, was_modified)
     """
+    # Use config value if not provided
+    if target_size is None:
+        target_size = TARGET_IMAGE_SIZE
     # Load image if path
     if isinstance(image, str):
         raw_img = Image.open(image)
     return inputs
+def render_svg_to_image(svg_str, size=None):
     """Render SVG to high-quality PIL Image"""
+    # Use config value if not provided
+    if size is None:
+        size = RENDER_SIZE
     try:
         png_data = cairosvg.svg2png(
             bytestring=svg_str.encode('utf-8'),
 def is_valid_candidate(svg_str, img, subtype="illustration"):
     """Check candidate validity"""
+    if not svg_str or len(svg_str) < MIN_SVG_LENGTH:
         return False, "too_short"
     if '<svg' not in svg_str:
     img_array = np.array(img)
     mean_val = img_array.mean()
+    threshold = EMPTY_THRESHOLD_ILLUSTRATION if subtype == "illustration" else EMPTY_THRESHOLD_ICON
     if mean_val > threshold:
         return False, "empty_image"
         'repetition_penalty': repetition_penalty,
         'early_stopping': True,
         'no_repeat_ngram_size': 0,
+        'eos_token_id': EOS_TOKEN_ID,
+        'pad_token_id': PAD_TOKEN_ID,
+        'bos_token_id': BOS_TOKEN_ID,
     }
+    actual_samples = num_samples + EXTRA_CANDIDATES_BUFFER
     try:
         if progress_callback:
                 current_ids = generated_ids_batch[i:i+1]
                 fake_wrapper = torch.cat([
+                    torch.full((1, 1), BOS_TOKEN_ID, device=device),
                     current_ids,
+                    torch.full((1, 1), EOS_TOKEN_ID, device=device)
                 ], dim=1)
                 generated_xy = svg_tokenizer.process_generated_tokens(fake_wrapper)
                 if 'width=' not in svg_str:
                     svg_str = svg_str.replace('<svg', f'<svg width="{TARGET_IMAGE_SIZE}" height="{TARGET_IMAGE_SIZE}"', 1)
+                png_image = render_svg_to_image(svg_str, size=RENDER_SIZE)
                 is_valid, reason = is_valid_candidate(svg_str, png_image, subtype)
                 if is_valid:
     progress(0.05, f"Detected: {subtype}")
     inputs = prepare_inputs("text-to-svg", text_description.strip())
     def update_progress(val, msg):
         progress(val, msg)
     all_candidates = generate_candidates(
         inputs, "text-to-svg", subtype,
         temperature, top_p, int(top_k), repetition_penalty,
+        MAX_LENGTH, int(num_candidates),
         progress_callback=update_progress
     )
     try:
         progress(0.1, "Preparing model inputs...")
         inputs = prepare_inputs("image-to-svg", tmp_path)
         def update_progress(val, msg):
             progress(val, msg)
         all_candidates = generate_candidates(
             inputs, "image-to-svg", "image",
             temperature, top_p, int(top_k), repetition_penalty,
+            MAX_LENGTH, int(num_candidates),
             progress_callback=update_progress
         )
                         with gr.Group(elem_classes=["settings-group"]):
                             gr.Markdown("### Settings")
                             img_num_candidates = gr.Slider(
+                                minimum=1, maximum=MAX_NUM_CANDIDATES, value=DEFAULT_NUM_CANDIDATES, step=1,
                                 label="Number of Candidates"
                             )
                             img_replace_bg = gr.Checkbox(
                             with gr.Accordion("Advanced Parameters", open=False):
                                 img_temperature = gr.Slider(
+                                    minimum=0.1, maximum=1.0,
+                                    value=TASK_CONFIGS["image-to-svg"].get("default_temperature", 0.3),
+                                    step=0.05,
                                     label="Temperature (Lower=accurate)",
                                     info="0.2-0.4 recommended"
                                 )
                                 img_top_p = gr.Slider(
+                                    minimum=0.5, maximum=1.0,
+                                    value=TASK_CONFIGS["image-to-svg"].get("default_top_p", 0.90),
+                                    step=0.02,
                                     label="Top-P"
                                 )
                                 img_top_k = gr.Slider(
+                                    minimum=10, maximum=100,
+                                    value=TASK_CONFIGS["image-to-svg"].get("default_top_k", 50),
+                                    step=5,
                                     label="Top-K"
                                 )
                                 img_rep_penalty = gr.Slider(
+                                    minimum=1.0, maximum=1.3,
+                                    value=TASK_CONFIGS["image-to-svg"].get("default_repetition_penalty", 1.05),
+                                    step=0.01,
                                     label="Repetition Penalty"
                                 )
                         with gr.Group(elem_classes=["settings-group"]):
                             gr.Markdown("### Settings")
                             text_num_candidates = gr.Slider(
+                                minimum=1, maximum=MAX_NUM_CANDIDATES, value=6, step=1,
                                 label="Number of Candidates",
                                 info="More = better chances!"
                             )
                             with gr.Accordion("Advanced Parameters", open=False):
                                 text_temperature = gr.Slider(
+                                    minimum=0.1, maximum=1.0,
+                                    value=TASK_CONFIGS["text-to-svg-icon"].get("default_temperature", 0.5),
+                                    step=0.05,
                                     label="Temperature",
                                     info="Icons: 0.3-0.5 | Complex: 0.5-0.7"
                                 )
                                 text_top_p = gr.Slider(
+                                    minimum=0.5, maximum=1.0,
+                                    value=TASK_CONFIGS["text-to-svg-icon"].get("default_top_p", 0.90),
+                                    step=0.02,
                                     label="Top-P"
                                 )
                                 text_top_k = gr.Slider(
+                                    minimum=10, maximum=100,
+                                    value=TASK_CONFIGS["text-to-svg-icon"].get("default_top_k", 60),
+                                    step=5,
                                     label="Top-K"
                                 )
                                 text_rep_penalty = gr.Slider(
+                                    minimum=1.0, maximum=1.3,
+                                    value=TASK_CONFIGS["text-to-svg-icon"].get("default_repetition_penalty", 1.03),
+                                    step=0.01,
                                     label="Repetition Penalty",
                                     info="Increase if you see repetitive patterns"
                                 )
     print(f"Precision: {DTYPE}")
     print("="*60)
+    # Print loaded config values
+    print("\n[CONFIG] Loaded settings:")
+    print(f"  - TARGET_IMAGE_SIZE: {TARGET_IMAGE_SIZE}")
+    print(f"  - RENDER_SIZE: {RENDER_SIZE}")
+    print(f"  - BLACK_COLOR_TOKEN: {BLACK_COLOR_TOKEN}")
+    print(f"  - MAX_LENGTH: {MAX_LENGTH}")
+    print(f"  - BOS_TOKEN_ID: {BOS_TOKEN_ID}")
+    print(f"  - EOS_TOKEN_ID: {EOS_TOKEN_ID}")
+    print(f"  - PAD_TOKEN_ID: {PAD_TOKEN_ID}")
+    print("="*60)
     print("\nLoading models (may download from HuggingFace Hub if needed)...")
     load_models(args.weight_path, args.model_path)
     print("Models loaded successfully!\n")
         server_port=args.port,
         share=args.share,
         debug=args.debug,
+    )