Spaces:

mrpink925
/

StencilAI_Demo

Sleeping

App Files Files Community

github-actions[bot] commited on 11 days ago

Commit

18a9cc5

1 Parent(s): df4d2da

Update code from GitHub Actions - 2025-12-02 18:06:18

Browse files

Files changed (3) hide show

Stencil.py +114 -32
app.py +50 -10
requirements.txt +3 -0

Stencil.py CHANGED Viewed

@@ -6,7 +6,14 @@ using pretrained Stable Diffusion models with prompt engineering.
 """
 import torch
-from diffusers import StableDiffusionPipeline, DPMSolverMultistepScheduler
 from PIL import Image, ImageOps, ImageEnhance, ImageFilter
 from typing import Optional, List, Union
 import os
@@ -43,7 +50,9 @@ class StencilGenerator:
     def __init__(
         self,
-        model_id: str = "stabilityai/stable-diffusion-2-1-base",
         device: Optional[str] = None,
         use_fp16: bool = True
     ):
@@ -51,18 +60,52 @@ class StencilGenerator:
         Initialize the Stencil Generator.
         Args:
-            model_id: HuggingFace model ID for Stable Diffusion model
             device: Device to run on ('cuda', 'cpu', or None for auto-detect)
             use_fp16: Whether to use half precision (FP16) for faster inference
         """
         self.model_id = model_id
         self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
         self.use_fp16 = use_fp16 and self.device == "cuda"
         # Apply monkey-patch to fix transformers version compatibility
         _patch_clip_init()
-        print(f"Loading model {model_id} on {self.device}...")
         # Load the pipeline with version-compatible parameters
         dtype = torch.float16 if self.use_fp16 else torch.float32
@@ -86,34 +129,67 @@ class StencilGenerator:
             # Uncomment if you have limited VRAM
             # self.pipe.enable_vae_slicing()
-        print("Model loaded successfully!")
-        # Default stencil prompt suffix - simplified since post-processing does the heavy lifting
-        self.stencil_suffix = (
-            "black silhouette, high contrast, simple stencil design, "
-            "centered in frame, complete object visible, isolated subject"
-        )
-        # Default negative prompt to avoid unwanted features
-        # self.default_negative_prompt = (
-        #     "color, colorful, photograph, realistic, detailed, complex, "
-        #     "blurry, low quality, watermark, text, cropped, cut off, "
-        #     "partial, multiple subjects, duplicate"
-        # )
-        # Simpler stencil prompt suffix (seems to work better) - simplified since post-processing does the heavy lifting
-        # self.stencil_suffix = (
-        #     "black silhouette, high contrast, sketch line drawing, simple, simple stencil design, white background, "
-        #     # "centered in frame, complete object visible, isolated subject"
-        # )
-        # Simpler negative prompt (seems to work better) to avoid unwanted features
-        self.default_negative_prompt = (
-            "color, colorful, photograph, realistic, detailed, complex, "
-            # "blurry, low quality, watermark, text, cropped, cut off, "
-            # "partial, multiple subjects, duplicate"
         )
     def _clean_stencil_image(
         self,
         image: Image.Image,
@@ -234,12 +310,18 @@ class StencilGenerator:
         """
-        # Construct full prompt
         full_prompt = prompt
-        if add_stencil_suffix:
-            full_prompt = f"{prompt}, {self.stencil_suffix}"
-        # Use default negative prompt if none provided
         full_negative_prompt = negative_prompt or self.default_negative_prompt
         # Set seed if provided

 """
 import torch
+from diffusers import (
+    StableDiffusionPipeline,
+    DPMSolverMultistepScheduler,
+    UNet2DConditionModel,
+    AutoencoderKL,
+    PNDMScheduler
+)
+from transformers import CLIPTextModel, CLIPTokenizer
 from PIL import Image, ImageOps, ImageEnhance, ImageFilter
 from typing import Optional, List, Union
 import os
     def __init__(
         self,
+        model_id: str = "Manojb/stable-diffusion-2-1-base",
+        # model_id: str = "runwayml/stable-diffusion-v1-5",
+        checkpoint_path: Optional[str] = None,
         device: Optional[str] = None,
         use_fp16: bool = True
     ):
         Initialize the Stencil Generator.
         Args:
+            model_id: HuggingFace model ID for Stable Diffusion model (used if checkpoint_path is None)
+            checkpoint_path: Path to fine-tuned checkpoint directory (e.g., "./checkpoint-1000")
+                           If provided, loads fine-tuned model instead of pretrained model
             device: Device to run on ('cuda', 'cpu', or None for auto-detect)
             use_fp16: Whether to use half precision (FP16) for faster inference
         """
         self.model_id = model_id
+        self.checkpoint_path = checkpoint_path
         self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
         self.use_fp16 = use_fp16 and self.device == "cuda"
+        self.is_checkpoint_model = checkpoint_path is not None
         # Apply monkey-patch to fix transformers version compatibility
         _patch_clip_init()
+        # Load model based on whether checkpoint is provided
+        if self.is_checkpoint_model:
+            self._load_from_checkpoint(checkpoint_path)
+        else:
+            self._load_from_pretrained(model_id)
+        print("Model loaded successfully!")
+        # Set prompt decoration based on model type
+        if self.is_checkpoint_model:
+            # Fine-tuned models use simple "sketch of" prefix
+            self.stencil_suffix = "Sketch of"
+            self.default_negative_prompt = None
+        else:
+            # Standard SD 2.1 models use detailed stencil suffix
+            self.stencil_suffix = (
+                "black silhouette, high contrast, simple stencil design, "
+                "centered in frame, complete object visible, isolated subject"
+            )
+            self.default_negative_prompt = (
+                "color, colorful, photograph, realistic, detailed, complex, "
+            )
+    def _load_from_pretrained(self, model_id: str):
+        """
+        Load a pretrained model from HuggingFace.
+        Args:
+            model_id: HuggingFace model ID
+        """
+        print(f"Loading pretrained model {model_id} on {self.device}...")
         # Load the pipeline with version-compatible parameters
         dtype = torch.float16 if self.use_fp16 else torch.float32
             # Uncomment if you have limited VRAM
             # self.pipe.enable_vae_slicing()
+    def _load_from_checkpoint(self, checkpoint_path: str):
+        """
+        Load a fine-tuned model from checkpoint directory or HuggingFace Hub.
+        Args:
+            checkpoint_path: Path to checkpoint directory containing UNet,
+                           or HuggingFace Hub model ID (e.g., "username/model-name")
+        """
+        print(f"Loading fine-tuned checkpoint from {checkpoint_path} on {self.device}...")
+        # Base model for standard components
+        base_model = "runwayml/stable-diffusion-v1-5"
+        print("Loading tokenizer...")
+        tokenizer = CLIPTokenizer.from_pretrained(base_model, subfolder="tokenizer")
+        print("Loading text encoder...")
+        text_encoder = CLIPTextModel.from_pretrained(base_model, subfolder="text_encoder")
+        print("Loading VAE...")
+        vae = AutoencoderKL.from_pretrained(base_model, subfolder="vae")
+        print("Loading scheduler...")
+        scheduler = PNDMScheduler.from_pretrained(base_model, subfolder="scheduler")
+        # Load fine-tuned UNet from checkpoint
+        # Handles both local paths and HuggingFace Hub model IDs
+        if os.path.exists(checkpoint_path):
+            # Local path - append /unet subdirectory
+            unet_path = f"{checkpoint_path}/unet"
+        else:
+            # Assume it's a HuggingFace Hub model ID
+            unet_path = checkpoint_path
+        print(f"Loading fine-tuned UNet from {unet_path}...")
+        unet = UNet2DConditionModel.from_pretrained(unet_path, subfolder="unet" if not os.path.exists(checkpoint_path) else None)
+        # Assemble pipeline
+        print("Assembling pipeline...")
+        self.pipe = StableDiffusionPipeline(
+            vae=vae,
+            text_encoder=text_encoder,
+            tokenizer=tokenizer,
+            unet=unet,
+            scheduler=scheduler,
+            safety_checker=None,
+            feature_extractor=None,
+            requires_safety_checker=False
         )
+        # Move to device with FP16 if enabled
+        if self.device == "cuda":
+            if self.use_fp16:
+                self.pipe.vae = self.pipe.vae.to(self.device, dtype=torch.float16)
+                self.pipe.text_encoder = self.pipe.text_encoder.to(self.device, dtype=torch.float16)
+                self.pipe.unet = self.pipe.unet.to(self.device, dtype=torch.float16)
+            else:
+                self.pipe = self.pipe.to(self.device)
+        else:
+            self.pipe = self.pipe.to(self.device)
     def _clean_stencil_image(
         self,
         image: Image.Image,
         """
+        # Construct full prompt based on model type
         full_prompt = prompt
+        if self.is_checkpoint_model:
+            # For fine-tuned checkpoints, add "sketch of" prefix
+            if add_stencil_suffix and not prompt.lower().startswith("sketch of"):
+                full_prompt = f"sketch of {prompt}"
+        else:
+            # For standard models, use stencil suffix
+            if add_stencil_suffix:
+                full_prompt = f"{prompt}, {self.stencil_suffix}"
+        # Use default negative prompt if none provided (None for checkpoint models)
         full_negative_prompt = negative_prompt or self.default_negative_prompt
         # Set seed if provided

app.py CHANGED Viewed

@@ -11,6 +11,7 @@ from StencilCV import StencilCV
 import torch
 from typing import Optional
 import numpy as np
 MAX_IMAGES = 4
@@ -20,22 +21,48 @@ class StencilApp:
     def __init__(self):
         """Initialize the Stencil Generator."""
         self.generator = None
         self.original_images = []  # Store original images for toggling
         self.outlined_status = []  # Track which images have outline applied
-    def load_model(self):
-        """Lazy load the model when first needed."""
-        if self.generator is None:
-            print("Initializing Stencil Generator...")
             self.generator = StencilGenerator(
                 model_id="stabilityai/stable-diffusion-2-1-base",
                 use_fp16=torch.cuda.is_available()
             )
         return self.generator
     def generate_stencil(
         self,
         prompt: str,
         negative_prompt: Optional[str],
         num_images: int,
         num_inference_steps: int,
@@ -56,8 +83,8 @@ class StencilApp:
             return [], "Please enter a prompt!"
         try:
-            # Load model if not already loaded
-            generator = self.load_model()
             # Generate the image(s)
             images = generator.generate(
@@ -106,8 +133,12 @@ class StencilApp:
         if not gallery_data:
             return gallery_data, "No images to process!"
         if selected_index is None:
-            return gallery_data, "Please select an image first by clicking on it!"
         if selected_index >= len(self.original_images):
             return gallery_data, "Error: Image index out of range!"
@@ -177,10 +208,17 @@ def create_interface():
                     lines=3
                 )
                 num_images = gr.Slider(
                     minimum=1,
                     maximum=MAX_IMAGES,
-                    value=1,
                     step=1,
                     label="Number of Images",
                     info="Generate multiple variations to choose from"
@@ -305,9 +343,10 @@ def create_interface():
                     """
                     ### Tips for Best Results:
                     - Keep prompts simple and descriptive
                     - Generate multiple images to see variations
-                    - The AI automatically adds stencil styling
-                    - Use negative prompts to avoid unwanted features
                     - Try the outline option after generation for different styles
                     - Higher inference steps = better quality (but slower)
                     """
@@ -318,6 +357,7 @@ def create_interface():
             fn=app.generate_stencil,
             inputs=[
                 prompt,
                 negative_prompt,
                 num_images,
                 num_inference_steps,

 import torch
 from typing import Optional
 import numpy as np
+import os
 MAX_IMAGES = 4
     def __init__(self):
         """Initialize the Stencil Generator."""
         self.generator = None
+        self.current_model_type = None
         self.original_images = []  # Store original images for toggling
         self.outlined_status = []  # Track which images have outline applied
+    def load_model(self, model_type: str = "Standard SD 2.1"):
+        """
+        Lazy load the model when first needed or reload if model type changed.
+        Args:
+            model_type: Type of model to load ("Standard SD 2.1", "Checkpoint-500", "Checkpoint-1000")
+        """
+        # Reload if model type changed or first load
+        if self.generator is None or self.current_model_type != model_type:
+            print(f"Initializing Stencil Generator with {model_type}...")
+            # Determine checkpoint path based on model type
+            # Can be local path or HuggingFace Hub model ID
+            checkpoint_path = None
+            if model_type == "Checkpoint-500":
+                # Try local path first, fallback to HuggingFace Hub
+                checkpoint_path = "./Fine-tuning/checkpoint-500"
+                if not os.path.exists(checkpoint_path):
+                    checkpoint_path = "mrpink925/stencilai-checkpoint-500"
+            elif model_type == "Checkpoint-1000":
+                # Try local path first, fallback to HuggingFace Hub
+                checkpoint_path = "./Fine-tuning/checkpoint-1000"
+                if not os.path.exists(checkpoint_path):
+                    checkpoint_path = "mrpink925/stencilai-checkpoint-1000"
             self.generator = StencilGenerator(
                 model_id="stabilityai/stable-diffusion-2-1-base",
+                checkpoint_path=checkpoint_path,
                 use_fp16=torch.cuda.is_available()
             )
+            self.current_model_type = model_type
         return self.generator
     def generate_stencil(
         self,
         prompt: str,
+        model_type: str,
         negative_prompt: Optional[str],
         num_images: int,
         num_inference_steps: int,
             return [], "Please enter a prompt!"
         try:
+            # Load model (will reload if model type changed)
+            generator = self.load_model(model_type)
             # Generate the image(s)
             images = generator.generate(
         if not gallery_data:
             return gallery_data, "No images to process!"
+        # If there's only 1 image and no selection, default to index 0
         if selected_index is None:
+            if len(self.original_images) == 1:
+                selected_index = 0
+            else:
+                return gallery_data, "Please select an image first by clicking on it!"
         if selected_index >= len(self.original_images):
             return gallery_data, "Error: Image index out of range!"
                     lines=3
                 )
+                model_selector = gr.Radio(
+                    choices=["Standard SD 2.1", "Checkpoint-500", "Checkpoint-1000"],
+                    value="Checkpoint-1000",
+                    label="Model Type",
+                    info="Choose between standard model or fine-tuned checkpoints (trained on sketch-style images)"
+                )
                 num_images = gr.Slider(
                     minimum=1,
                     maximum=MAX_IMAGES,
+                    value=2,
                     step=1,
                     label="Number of Images",
                     info="Generate multiple variations to choose from"
                     """
                     ### Tips for Best Results:
                     - Keep prompts simple and descriptive
+                    - **Standard SD 2.1**: Best for general stencils with detailed prompt engineering
+                    - **Checkpoint models**: Fine-tuned for sketch-style stencils (automatically adds "sketch of" prefix)
                     - Generate multiple images to see variations
+                    - Use negative prompts to avoid unwanted features (works best with Standard SD 2.1)
                     - Try the outline option after generation for different styles
                     - Higher inference steps = better quality (but slower)
                     """
             fn=app.generate_stencil,
             inputs=[
                 prompt,
+                model_selector,
                 negative_prompt,
                 num_images,
                 num_inference_steps,

requirements.txt CHANGED Viewed

@@ -3,12 +3,15 @@ diffusers>=0.21.0
 transformers>=4.30.0
 accelerate>=0.20.0
 safetensors>=0.3.0
 gradio>=4.0.0
 numpy>=1.24.0
 Pillow>=9.0.0
 scipy>=1.10.0
 scikit-image>=0.20.0
 opencv-python>=4.8.0
 # Note: Pillow, numpy, scipy, scikit-image required for AI-based post-processing
 # opencv-python required for StencilCV (traditional computer vision approach)

 transformers>=4.30.0
 accelerate>=0.20.0
 safetensors>=0.3.0
+huggingface-hub>=0.16.0
 gradio>=4.0.0
 numpy>=1.24.0
 Pillow>=9.0.0
 scipy>=1.10.0
 scikit-image>=0.20.0
 opencv-python>=4.8.0
+spacy[cuda11x]
+https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl
 # Note: Pillow, numpy, scipy, scikit-image required for AI-based post-processing
 # opencv-python required for StencilCV (traditional computer vision approach)