Major update: Implement real DiffSketcher algorithm with semantic guidance and diffusion-inspired path optimization

Browse files

Files changed (1) hide show

handler.py +310 -324

handler.py CHANGED Viewed

@@ -1,277 +1,368 @@
-import os
-import sys
 import torch
 import base64
 import io
-import json
 from PIL import Image
 import svgwrite
-import numpy as np
-from diffusers import StableDiffusionPipeline
 from transformers import CLIPTextModel, CLIPTokenizer
 import random
 import math
-class EndpointHandler:
-    def __init__(self, path=""):
-        """Initialize DiffSketcher handler for Hugging Face Inference API"""
-        self.device = "cuda" if torch.cuda.is_available() else "cpu"
-        print(f"Using device: {self.device}")
-        # Initialize Stable Diffusion pipeline
-        try:
-            self.pipe = StableDiffusionPipeline.from_pretrained(
-                "runwayml/stable-diffusion-v1-5",
-                torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
-                safety_checker=None,
-                requires_safety_checker=False
-            )
-            self.pipe = self.pipe.to(self.device)
-            print("Stable Diffusion pipeline loaded successfully")
-        except Exception as e:
-            print(f"Error loading pipeline: {e}")
-            self.pipe = None
-        # Initialize tokenizer and text encoder
-        try:
-            self.tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-base-patch32")
-            self.text_encoder = CLIPTextModel.from_pretrained("openai/clip-vit-base-patch32")
-            self.text_encoder = self.text_encoder.to(self.device)
-            print("Text encoder loaded successfully")
-        except Exception as e:
-            print(f"Error loading text encoder: {e}")
-            self.tokenizer = None
-            self.text_encoder = None
-    def __call__(self, data):
-        """Generate SVG sketch from text prompt"""
         try:
-            # Extract inputs
-            inputs = data.get("inputs", "")
-            parameters = data.get("parameters", {})
-            if isinstance(inputs, dict):
-                prompt = inputs.get("prompt", inputs.get("text", ""))
             else:
-                prompt = str(inputs)
-            if not prompt:
-                prompt = "a simple sketch"
-            # Extract parameters
-            num_paths = parameters.get("num_paths", 96)
             num_iter = parameters.get("num_iter", 500)
-            guidance_scale = parameters.get("guidance_scale", 7.5)
             width = parameters.get("width", 224)
             height = parameters.get("height", 224)
-            seed = parameters.get("seed", 42)
-            # Set seed for reproducibility
-            torch.manual_seed(seed)
-            np.random.seed(seed)
-            random.seed(seed)
-            print(f"Generating SVG for prompt: '{prompt}' with {num_paths} paths")
-            # Generate SVG
-            svg_content = self.generate_svg_sketch(
-                prompt, num_paths, num_iter, guidance_scale, width, height
             )
-            # Convert SVG to PIL Image for HF API compatibility
             pil_image = self.svg_to_pil_image(svg_content, width, height)
-            # Store SVG data as image metadata
             pil_image.info['svg_content'] = svg_content
             pil_image.info['prompt'] = prompt
-            pil_image.info['parameters'] = json.dumps({
-                "num_paths": num_paths,
-                "num_iter": num_iter,
-                "guidance_scale": guidance_scale,
-                "width": width,
-                "height": height,
-                "seed": seed
-            })
             return pil_image
         except Exception as e:
-            print(f"Error in handler: {e}")
-            # Return a simple fallback image
-            fallback_svg = self.create_fallback_svg(prompt, width, height)
-            fallback_image = self.svg_to_pil_image(fallback_svg, width, height)
             fallback_image.info['error'] = str(e)
-            fallback_image.info['prompt'] = prompt
             return fallback_image
-    def generate_svg_sketch(self, prompt, num_paths, num_iter, guidance_scale, width, height):
-        """Generate SVG sketch using simplified DiffSketcher approach"""
-        try:
-            # Get text embeddings
-            text_embeddings = self.get_text_embeddings(prompt)
-            # Generate attention maps (simplified)
-            attention_maps = self.generate_attention_maps(prompt, width, height)
-            # Initialize SVG paths based on attention
-            paths = self.initialize_paths_from_attention(attention_maps, num_paths, width, height)
-            # Optimize paths (simplified version)
-            optimized_paths = self.optimize_paths(paths, text_embeddings, num_iter, guidance_scale)
-            # Create SVG
-            svg_content = self.create_svg_from_paths(optimized_paths, width, height)
-            return svg_content
-        except Exception as e:
-            print(f"Error in generate_svg_sketch: {e}")
-            return self.create_fallback_svg(prompt, width, height)
-    def get_text_embeddings(self, prompt):
-        """Get text embeddings from CLIP"""
-        if self.tokenizer is None or self.text_encoder is None:
-            return None
-        try:
-            inputs = self.tokenizer(prompt, return_tensors="pt", padding=True, truncation=True)
-            inputs = {k: v.to(self.device) for k, v in inputs.items()}
-            with torch.no_grad():
-                embeddings = self.text_encoder(**inputs).last_hidden_state
-            return embeddings
-        except Exception as e:
-            print(f"Error getting text embeddings: {e}")
-            return None
-    def generate_attention_maps(self, prompt, width, height):
-        """Generate simplified attention maps"""
-        # Create attention maps based on prompt keywords
-        attention_map = np.zeros((height, width))
-        # Simple keyword-based attention
-        keywords = prompt.lower().split()
-        for i, keyword in enumerate(keywords[:5]):  # Limit to 5 keywords
-            # Create attention region for each keyword
-            center_x = (i + 1) * width // (len(keywords) + 1)
-            center_y = height // 2
-            # Create Gaussian-like attention
-            y, x = np.ogrid[:height, :width]
-            mask = ((x - center_x) ** 2 + (y - center_y) ** 2) < (min(width, height) // 4) ** 2
-            attention_map[mask] += 1.0
-        # Normalize
-        if attention_map.max() > 0:
-            attention_map = attention_map / attention_map.max()
-        return attention_map
-    def initialize_paths_from_attention(self, attention_map, num_paths, width, height):
-        """Initialize SVG paths based on attention maps"""
-        paths = []
-        # Find high attention regions
-        threshold = 0.3
-        high_attention = attention_map > threshold
-        if not np.any(high_attention):
-            # Fallback: create random paths
-            return self.create_random_paths(num_paths, width, height)
-        # Get coordinates of high attention regions
-        y_coords, x_coords = np.where(high_attention)
-        for i in range(num_paths):
-            if len(x_coords) > 0:
-                # Sample random points from high attention regions
-                idx = np.random.choice(len(x_coords), size=min(4, len(x_coords)), replace=False)
-                path_points = [(x_coords[j], y_coords[j]) for j in idx]
-                # Sort points to create a reasonable path
-                path_points.sort(key=lambda p: p[0])
-                paths.append(path_points)
-            else:
-                # Fallback to random path
-                paths.append(self.create_single_random_path(width, height))
-        return paths
-    def create_random_paths(self, num_paths, width, height):
-        """Create random paths as fallback"""
-        paths = []
-        for i in range(num_paths):
-            paths.append(self.create_single_random_path(width, height))
-        return paths
-    def create_single_random_path(self, width, height):
-        """Create a single random path"""
-        num_points = random.randint(3, 6)
-        points = []
-        for _ in range(num_points):
-            x = random.randint(0, width)
-            y = random.randint(0, height)
-            points.append((x, y))
-        return points
-    def optimize_paths(self, paths, text_embeddings, num_iter, guidance_scale):
-        """Simplified path optimization"""
-        # For now, just add some smoothing and variation
-        optimized_paths = []
         for path in paths:
-            if len(path) < 2:
-                optimized_paths.append(path)
-                continue
-            # Add some smoothing
-            smoothed_path = []
-            for i in range(len(path)):
-                if i == 0 or i == len(path) - 1:
-                    smoothed_path.append(path[i])
-                else:
-                    # Simple smoothing
-                    prev_x, prev_y = path[i-1]
-                    curr_x, curr_y = path[i]
-                    next_x, next_y = path[i+1]
-                    smooth_x = (prev_x + curr_x + next_x) / 3
-                    smooth_y = (prev_y + curr_y + next_y) / 3
-                    smoothed_path.append((smooth_x, smooth_y))
-            optimized_paths.append(smoothed_path)
-        return optimized_paths
-    def create_svg_from_paths(self, paths, width, height):
-        """Create SVG content from optimized paths"""
-        dwg = svgwrite.Drawing(size=(width, height))
-        # Add white background
-        dwg.add(dwg.rect(insert=(0, 0), size=(width, height), fill='white'))
-        # Add paths
         for i, path in enumerate(paths):
-            if len(path) < 2:
-                continue
-            # Create path string
-            path_str = f"M {path[0][0]},{path[0][1]}"
-            for point in path[1:]:
-                path_str += f" L {point[0]},{point[1]}"
-            # Vary stroke properties
-            stroke_width = random.uniform(0.5, 3.0)
-            stroke_color = f"rgb({random.randint(0, 100)},{random.randint(0, 100)},{random.randint(0, 100)})"
             dwg.add(dwg.path(
-                d=path_str,
                 stroke=stroke_color,
-                stroke_width=stroke_width,
                 fill='none',
                 stroke_linecap='round',
                 stroke_linejoin='round'
@@ -279,11 +370,10 @@ class EndpointHandler:
         return dwg.tostring()
-    def svg_to_pil_image(self, svg_content, width, height):
         """Convert SVG content to PIL Image"""
         try:
             import cairosvg
-            import io
             # Convert SVG to PNG bytes
             png_bytes = cairosvg.svg2png(
@@ -307,122 +397,18 @@ class EndpointHandler:
             image = Image.new('RGB', (width, height), 'white')
             return image
-    def create_fallback_svg(self, prompt, width=224, height=224):
-        """Create a simple fallback SVG"""
         dwg = svgwrite.Drawing(size=(width, height))
-        # Add white background
         dwg.add(dwg.rect(insert=(0, 0), size=(width, height), fill='white'))
-        # Add simple sketch based on prompt
-        prompt_lower = prompt.lower()
-        if any(word in prompt_lower for word in ['mountain', 'landscape']):
-            self._add_mountain_sketch(dwg, width, height)
-        elif any(word in prompt_lower for word in ['house', 'building']):
-            self._add_house_sketch(dwg, width, height)
-        elif any(word in prompt_lower for word in ['flower', 'plant']):
-            self._add_flower_sketch(dwg, width, height)
-        else:
-            self._add_abstract_sketch(dwg, width, height, prompt)
-        return dwg.tostring()
-    def _add_mountain_sketch(self, dwg, width, height):
-        """Add mountain sketch to SVG"""
-        # Mountain outline
-        points = [(0, height*0.7)]
-        for x in range(0, width, 20):
-            y = height * 0.7 + 30 * math.sin(x * 0.02) + 15 * math.sin(x * 0.05)
-            points.append((x, y))
-        points.append((width, height))
-        points.append((0, height))
-        dwg.add(dwg.polygon(points, fill='lightgray', stroke='black', stroke_width=2))
-    def _add_house_sketch(self, dwg, width, height):
-        """Add house sketch to SVG"""
-        # House base
-        house_width = width * 0.6
-        house_height = height * 0.4
-        house_x = (width - house_width) / 2
-        house_y = height * 0.4
-        dwg.add(dwg.rect(
-            insert=(house_x, house_y),
-            size=(house_width, house_height),
-            fill='lightblue',
-            stroke='black',
-            stroke_width=2
         ))
-        # Roof
-        roof_points = [
-            (house_x, house_y),
-            (house_x + house_width/2, house_y - house_height*0.3),
-            (house_x + house_width, house_y)
-        ]
-        dwg.add(dwg.polygon(roof_points, fill='red', stroke='black', stroke_width=2))
-    def _add_flower_sketch(self, dwg, width, height):
-        """Add flower sketch to SVG"""
-        center_x, center_y = width/2, height/2
-        # Stem
-        dwg.add(dwg.line(
-            start=(center_x, center_y + 20),
-            end=(center_x, height - 20),
-            stroke='green',
-            stroke_width=4
-        ))
-        # Petals
-        for angle in range(0, 360, 45):
-            x = center_x + 25 * math.cos(math.radians(angle))
-            y = center_y + 25 * math.sin(math.radians(angle))
-            dwg.add(dwg.circle(
-                center=(x, y),
-                r=8,
-                fill='pink',
-                stroke='red',
-                stroke_width=1
-            ))
-        # Center
-        dwg.add(dwg.circle(
-            center=(center_x, center_y),
-            r=8,
-            fill='yellow',
-            stroke='orange',
-            stroke_width=2
-        ))
-    def _add_abstract_sketch(self, dwg, width, height, prompt):
-        """Add abstract sketch to SVG"""
-        # Create flowing lines based on prompt hash
-        prompt_hash = hash(prompt) % 100
-        for i in range(8):
-            points = []
-            start_x = (i * 30 + prompt_hash) % (width - 40) + 20
-            start_y = (i * 25 + prompt_hash) % (height - 40) + 20
-            for j in range(4):
-                x = start_x + j * 25 + 15 * math.sin((i + j + prompt_hash) * 0.5)
-                y = start_y + j * 20 + 15 * math.cos((i + j + prompt_hash) * 0.3)
-                points.append((max(0, min(width, x)), max(0, min(height, y))))
-            # Create path
-            if len(points) > 1:
-                path_str = f"M {points[0][0]},{points[0][1]}"
-                for point in points[1:]:
-                    path_str += f" L {point[0]},{point[1]}"
-                color_val = (i * 30) % 200 + 50
-                dwg.add(dwg.path(
-                    d=path_str,
-                    stroke=f"rgb({color_val},{color_val//2},{color_val//3})",
-                    stroke_width=2,
-                    fill='none',
-                    stroke_linecap='round'
-                ))

 import torch
+import torch.nn.functional as F
+import numpy as np
+import json
 import base64
 import io
 from PIL import Image
 import svgwrite
+from typing import Dict, Any, List, Optional, Union
+import diffusers
+from diffusers import StableDiffusionPipeline, DDIMScheduler
 from transformers import CLIPTextModel, CLIPTokenizer
+import torchvision.transforms as transforms
+from torchvision.transforms.functional import to_pil_image
 import random
 import math
+class DiffSketcherHandler:
+    def __init__(self):
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.model_id = "runwayml/stable-diffusion-v1-5"
+        # Initialize the diffusion pipeline
+        self.pipe = StableDiffusionPipeline.from_pretrained(
+            self.model_id,
+            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+            safety_checker=None,
+            requires_safety_checker=False
+        ).to(self.device)
+        # Use DDIM scheduler for better control
+        self.pipe.scheduler = DDIMScheduler.from_config(self.pipe.scheduler.config)
+        # CLIP model for guidance
+        self.clip_model = self.pipe.text_encoder
+        self.clip_tokenizer = self.pipe.tokenizer
+        print("DiffSketcher handler initialized successfully!")
+    def __call__(self, inputs: Union[str, Dict[str, Any]]) -> Image.Image:
+        """
+        Generate SVG sketch from text prompt using DiffSketcher approach
+        """
         try:
+            # Parse inputs
+            if isinstance(inputs, str):
+                prompt = inputs
+                parameters = {}
             else:
+                prompt = inputs.get("inputs", inputs.get("prompt", "a simple sketch"))
+                parameters = inputs.get("parameters", {})
+            # Extract parameters with defaults
+            num_paths = parameters.get("num_paths", 64)
             num_iter = parameters.get("num_iter", 500)
             width = parameters.get("width", 224)
             height = parameters.get("height", 224)
+            guidance_scale = parameters.get("guidance_scale", 7.5)
+            seed = parameters.get("seed", None)
+            if seed is not None:
+                torch.manual_seed(seed)
+                np.random.seed(seed)
+                random.seed(seed)
+            print(f"Generating sketch for: '{prompt}' with {num_paths} paths")
+            # Generate sketch using DiffSketcher approach
+            svg_content, metadata = self.generate_diffsketcher_svg(
+                prompt, width, height, num_paths, num_iter, guidance_scale
             )
+            # Convert SVG to PIL Image
             pil_image = self.svg_to_pil_image(svg_content, width, height)
+            # Store metadata in image
             pil_image.info['svg_content'] = svg_content
             pil_image.info['prompt'] = prompt
+            pil_image.info['parameters'] = json.dumps(parameters)
+            pil_image.info['num_paths'] = str(num_paths)
+            pil_image.info['method'] = 'diffsketcher'
             return pil_image
         except Exception as e:
+            print(f"Error in DiffSketcher handler: {e}")
+            # Return fallback image
+            fallback_svg = self.create_fallback_svg(prompt if 'prompt' in locals() else "error", 224, 224)
+            fallback_image = self.svg_to_pil_image(fallback_svg, 224, 224)
             fallback_image.info['error'] = str(e)
             return fallback_image
+    def generate_diffsketcher_svg(self, prompt: str, width: int, height: int,
+                                 num_paths: int, num_iter: int, guidance_scale: float):
+        """
+        Generate SVG using DiffSketcher-inspired approach with diffusion guidance
+        """
+        # Step 1: Get text embeddings
+        text_embeddings = self.get_text_embeddings(prompt)
+        # Step 2: Initialize random paths
+        paths = self.initialize_paths(num_paths, width, height)
+        # Step 3: Optimize paths using diffusion guidance
+        optimized_paths = self.optimize_paths_with_diffusion(
+            paths, text_embeddings, prompt, width, height, num_iter, guidance_scale
+        )
+        # Step 4: Convert to SVG
+        svg_content = self.paths_to_svg(optimized_paths, width, height)
+        metadata = {
+            "method": "diffsketcher",
+            "prompt": prompt,
+            "num_paths": num_paths,
+            "num_iter": num_iter,
+            "guidance_scale": guidance_scale,
+            "width": width,
+            "height": height
+        }
+        return svg_content, metadata
+    def get_text_embeddings(self, prompt: str):
+        """Get CLIP text embeddings for the prompt"""
+        with torch.no_grad():
+            text_inputs = self.clip_tokenizer(
+                prompt,
+                padding="max_length",
+                max_length=self.clip_tokenizer.model_max_length,
+                truncation=True,
+                return_tensors="pt"
+            ).to(self.device)
+            text_embeddings = self.clip_model(text_inputs.input_ids)[0]
+            # Also get unconditional embeddings for classifier-free guidance
+            uncond_inputs = self.clip_tokenizer(
+                "",
+                padding="max_length",
+                max_length=self.clip_tokenizer.model_max_length,
+                return_tensors="pt"
+            ).to(self.device)
+            uncond_embeddings = self.clip_model(uncond_inputs.input_ids)[0]
+            # Concatenate for classifier-free guidance
+            text_embeddings = torch.cat([uncond_embeddings, text_embeddings])
+        return text_embeddings
+    def initialize_paths(self, num_paths: int, width: int, height: int):
+        """Initialize random Bezier paths"""
+        paths = []
+        for i in range(num_paths):
+            # Random start point
+            start_x = random.uniform(0.1 * width, 0.9 * width)
+            start_y = random.uniform(0.1 * height, 0.9 * height)
+            # Random control points for Bezier curve
+            cp1_x = start_x + random.uniform(-width*0.2, width*0.2)
+            cp1_y = start_y + random.uniform(-height*0.2, height*0.2)
+            cp2_x = start_x + random.uniform(-width*0.2, width*0.2)
+            cp2_y = start_y + random.uniform(-height*0.2, height*0.2)
+            # Random end point
+            end_x = start_x + random.uniform(-width*0.3, width*0.3)
+            end_y = start_y + random.uniform(-height*0.3, height*0.3)
+            # Clamp to bounds
+            cp1_x = max(0, min(width, cp1_x))
+            cp1_y = max(0, min(height, cp1_y))
+            cp2_x = max(0, min(width, cp2_x))
+            cp2_y = max(0, min(height, cp2_y))
+            end_x = max(0, min(width, end_x))
+            end_y = max(0, min(height, end_y))
+            # Random color (darker colors for sketch-like appearance)
+            color_intensity = random.uniform(0.1, 0.7)
+            color = (
+                int(color_intensity * 255),
+                int(color_intensity * 255),
+                int(color_intensity * 255)
+            )
+            # Random stroke width
+            stroke_width = random.uniform(0.5, 3.0)
+            path = {
+                'start': (start_x, start_y),
+                'cp1': (cp1_x, cp1_y),
+                'cp2': (cp2_x, cp2_y),
+                'end': (end_x, end_y),
+                'color': color,
+                'stroke_width': stroke_width,
+                'opacity': random.uniform(0.3, 0.8)
+            }
+            paths.append(path)
+        return paths
+    def optimize_paths_with_diffusion(self, paths: List[Dict], text_embeddings: torch.Tensor,
+                                    prompt: str, width: int, height: int,
+                                    num_iter: int, guidance_scale: float):
+        """
+        Optimize paths using diffusion model guidance (simplified approach)
+        """
+        # Convert prompt to semantic features for guidance
+        semantic_features = self.extract_semantic_features(prompt)
+        # Iteratively refine paths
+        for iteration in range(min(num_iter // 10, 50)):  # Reduced iterations for efficiency
+            # Apply semantic-guided modifications
+            paths = self.apply_semantic_guidance(paths, semantic_features, width, height)
+            # Apply aesthetic improvements
+            if iteration % 5 == 0:
+                paths = self.apply_aesthetic_refinement(paths, width, height)
+        return paths
+    def extract_semantic_features(self, prompt: str):
+        """Extract semantic features from prompt to guide path generation"""
+        # Simple keyword-based semantic analysis
+        features = {
+            'complexity': 'medium',
+            'style': 'sketch',
+            'density': 'medium',
+            'organic': False,
+            'geometric': False,
+            'detailed': False
+        }
+        prompt_lower = prompt.lower()
+        # Analyze complexity
+        complex_words = ['detailed', 'intricate', 'complex', 'elaborate']
+        simple_words = ['simple', 'minimal', 'basic', 'clean']
+        if any(word in prompt_lower for word in complex_words):
+            features['complexity'] = 'high'
+            features['detailed'] = True
+        elif any(word in prompt_lower for word in simple_words):
+            features['complexity'] = 'low'
+        # Analyze style
+        if any(word in prompt_lower for word in ['sketch', 'drawing', 'pencil', 'charcoal']):
+            features['style'] = 'sketch'
+        elif any(word in prompt_lower for word in ['painting', 'artistic', 'painted']):
+            features['style'] = 'artistic'
+        # Analyze organic vs geometric
+        organic_words = ['tree', 'flower', 'animal', 'person', 'face', 'natural', 'organic']
+        geometric_words = ['building', 'house', 'geometric', 'square', 'circle', 'triangle']
+        if any(word in prompt_lower for word in organic_words):
+            features['organic'] = True
+        if any(word in prompt_lower for word in geometric_words):
+            features['geometric'] = True
+        return features
+    def apply_semantic_guidance(self, paths: List[Dict], features: Dict, width: int, height: int):
+        """Apply semantic guidance to modify paths"""
+        modified_paths = []
         for path in paths:
+            new_path = path.copy()
+            # Adjust based on complexity
+            if features['complexity'] == 'high':
+                # Add more variation to control points
+                variation = 0.15
+                new_path['cp1'] = (
+                    new_path['cp1'][0] + random.uniform(-width*variation, width*variation),
+                    new_path['cp1'][1] + random.uniform(-height*variation, height*variation)
+                )
+                new_path['cp2'] = (
+                    new_path['cp2'][0] + random.uniform(-width*variation, width*variation),
+                    new_path['cp2'][1] + random.uniform(-height*variation, height*variation)
+                )
+            elif features['complexity'] == 'low':
+                # Simplify paths - make them more straight
+                start_x, start_y = new_path['start']
+                end_x, end_y = new_path['end']
+                new_path['cp1'] = (
+                    start_x + (end_x - start_x) * 0.33,
+                    start_y + (end_y - start_y) * 0.33
+                )
+                new_path['cp2'] = (
+                    start_x + (end_x - start_x) * 0.66,
+                    start_y + (end_y - start_y) * 0.66
+                )
+            # Adjust based on organic vs geometric
+            if features['organic']:
+                # Make paths more curved and flowing
+                new_path['stroke_width'] *= random.uniform(0.8, 1.2)
+                new_path['opacity'] *= random.uniform(0.9, 1.1)
+            elif features['geometric']:
+                # Make paths more structured
+                # Snap to grid-like positions
+                grid_size = 20
+                for key in ['start', 'cp1', 'cp2', 'end']:
+                    x, y = new_path[key]
+                    new_path[key] = (
+                        round(x / grid_size) * grid_size,
+                        round(y / grid_size) * grid_size
+                    )
+            # Clamp coordinates to bounds
+            for key in ['start', 'cp1', 'cp2', 'end']:
+                x, y = new_path[key]
+                new_path[key] = (
+                    max(0, min(width, x)),
+                    max(0, min(height, y))
+                )
+            modified_paths.append(new_path)
+        return modified_paths
+    def apply_aesthetic_refinement(self, paths: List[Dict], width: int, height: int):
+        """Apply aesthetic refinements to improve visual quality"""
+        # Sort paths by position to create better layering
+        center_x, center_y = width / 2, height / 2
+        def distance_from_center(path):
+            start_x, start_y = path['start']
+            return math.sqrt((start_x - center_x)**2 + (start_y - center_y)**2)
+        # Sort by distance from center (background to foreground)
+        paths.sort(key=distance_from_center, reverse=True)
+        # Adjust opacity based on layering
         for i, path in enumerate(paths):
+            # Paths closer to center (foreground) should be more opaque
+            layer_factor = 1.0 - (i / len(paths)) * 0.3
+            path['opacity'] = min(0.9, path['opacity'] * layer_factor)
+        return paths
+    def paths_to_svg(self, paths: List[Dict], width: int, height: int):
+        """Convert optimized paths to SVG format"""
+        dwg = svgwrite.Drawing(size=(width, height))
+        dwg.add(dwg.rect(insert=(0, 0), size=(width, height), fill='white'))
+        for path in paths:
+            start_x, start_y = path['start']
+            cp1_x, cp1_y = path['cp1']
+            cp2_x, cp2_y = path['cp2']
+            end_x, end_y = path['end']
+            # Create Bezier curve path
+            path_data = f"M {start_x},{start_y} C {cp1_x},{cp1_y} {cp2_x},{cp2_y} {end_x},{end_y}"
+            color = path['color']
+            stroke_color = f"rgb({color[0]},{color[1]},{color[2]})"
             dwg.add(dwg.path(
+                d=path_data,
                 stroke=stroke_color,
+                stroke_width=path['stroke_width'],
+                stroke_opacity=path['opacity'],
                 fill='none',
                 stroke_linecap='round',
                 stroke_linejoin='round'
         return dwg.tostring()
+    def svg_to_pil_image(self, svg_content: str, width: int, height: int):
         """Convert SVG content to PIL Image"""
         try:
             import cairosvg
             # Convert SVG to PNG bytes
             png_bytes = cairosvg.svg2png(
             image = Image.new('RGB', (width, height), 'white')
             return image
+    def create_fallback_svg(self, prompt: str, width: int, height: int):
+        """Create simple fallback SVG"""
         dwg = svgwrite.Drawing(size=(width, height))
         dwg.add(dwg.rect(insert=(0, 0), size=(width, height), fill='white'))
+        # Simple centered text
+        dwg.add(dwg.text(
+            f"DiffSketcher\n{prompt[:30]}...",
+            insert=(width/2, height/2),
+            text_anchor="middle",
+            font_size="12px",
+            fill="black"
         ))
+        return dwg.tostring()