pixagram-backup

Runtime error

App Files Files Community

primerz commited on Oct 27

Commit

ff176e8

verified ·

1 Parent(s): c6fda39

Upload 2 files

Browse files

Files changed (2) hide show

app.py +334 -0
requirements.txt +19 -0

app.py ADDED Viewed

	@@ -0,0 +1,334 @@

+import gradio as gr
+import torch
+from diffusers import (
+    StableDiffusionXLPipeline,
+    StableDiffusionXLControlNetPipeline,
+    ControlNetModel,
+    AutoencoderKL,
+    DPMSolverMultistepScheduler
+)
+from diffusers.models.attention_processor import AttnProcessor2_0
+from insightface.app import FaceAnalysis
+from PIL import Image
+import numpy as np
+import cv2
+from transformers import pipeline as transformers_pipeline
+import os
+# Device configuration
+device = "cuda" if torch.cuda.is_available() else "cpu"
+dtype = torch.float16 if device == "cuda" else torch.float32
+print(f"Using device: {device}")
+class RetroArtConverter:
+    def __init__(self):
+        self.device = device
+        self.dtype = dtype
+        # Initialize face analysis for InstantID
+        print("Loading face analysis model...")
+        self.face_app = FaceAnalysis(
+            name='antelopev2',
+            root='./models/insightface',
+            providers=['CUDAExecutionProvider', 'CPUExecutionProvider']
+        )
+        self.face_app.prepare(ctx_id=0, det_size=(640, 640))
+        # Load ControlNet for depth
+        print("Loading ControlNet depth model...")
+        self.controlnet_depth = ControlNetModel.from_pretrained(
+            "diffusers/controlnet-zoe-depth-sdxl-1.0",
+            torch_dtype=self.dtype
+        ).to(self.device)
+        # Load custom VAE
+        print("Loading custom VAE (pixelate)...")
+        vae_path = "./models/vae/pixelate.safetensors"
+        if os.path.exists(vae_path):
+            self.vae = AutoencoderKL.from_single_file(
+                vae_path,
+                torch_dtype=self.dtype
+            ).to(self.device)
+        else:
+            print("Warning: Custom VAE not found, using default SDXL VAE")
+            self.vae = AutoencoderKL.from_pretrained(
+                "madebyollin/sdxl-vae-fp16-fix",
+                torch_dtype=self.dtype
+            ).to(self.device)
+        # Load depth estimator for preprocessing
+        print("Loading depth estimator...")
+        self.depth_estimator = transformers_pipeline(
+            'depth-estimation',
+            model="Intel/dpt-hybrid-midas"
+        )
+        # Load SDXL base model with custom checkpoint
+        print("Loading SDXL model (horizon)...")
+        model_path = "./models/checkpoints/horizon.safetensors"
+        if os.path.exists(model_path):
+            self.pipe = StableDiffusionXLControlNetPipeline.from_single_file(
+                model_path,
+                controlnet=self.controlnet_depth,
+                vae=self.vae,
+                torch_dtype=self.dtype,
+                use_safetensors=True
+            ).to(self.device)
+        else:
+            print("Warning: Custom checkpoint not found, using default SDXL")
+            self.pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
+                "stabilityai/stable-diffusion-xl-base-1.0",
+                controlnet=self.controlnet_depth,
+                vae=self.vae,
+                torch_dtype=self.dtype,
+                use_safetensors=True
+            ).to(self.device)
+        # Load custom LORA
+        print("Loading LORA (retroart)...")
+        lora_path = "./models/lora/retroart.safetensors"
+        if os.path.exists(lora_path):
+            self.pipe.load_lora_weights(lora_path)
+            print("LORA loaded successfully")
+        else:
+            print("Warning: Custom LORA not found")
+        # Optimize pipeline
+        self.pipe.scheduler = DPMSolverMultistepScheduler.from_config(
+            self.pipe.scheduler.config
+        )
+        self.pipe.enable_model_cpu_offload()
+        self.pipe.enable_vae_slicing()
+        # Enable attention slicing for memory efficiency
+        self.pipe.unet.set_attn_processor(AttnProcessor2_0())
+        if hasattr(self.pipe, 'enable_xformers_memory_efficient_attention'):
+            try:
+                self.pipe.enable_xformers_memory_efficient_attention()
+            except Exception as e:
+                print(f"xformers not available: {e}")
+        print("Model initialization complete!")
+    def get_depth_map(self, image):
+        """Generate depth map from input image"""
+        depth = self.depth_estimator(image)
+        depth_image = depth['depth']
+        # Convert to numpy array
+        depth_array = np.array(depth_image)
+        # Normalize to 0-255
+        depth_normalized = (depth_array - depth_array.min()) / (depth_array.max() - depth_array.min()) * 255
+        depth_normalized = depth_normalized.astype(np.uint8)
+        # Convert to 3-channel image
+        depth_colored = cv2.cvtColor(depth_normalized, cv2.COLOR_GRAY2RGB)
+        return Image.fromarray(depth_colored)
+    def detect_faces(self, image):
+        """Detect faces in the image using antelopev2"""
+        img_array = np.array(image)
+        faces = self.face_app.get(img_array)
+        return faces
+    def calculate_target_size(self, original_width, original_height, max_dimension=1024):
+        """Calculate target size maintaining aspect ratio"""
+        aspect_ratio = original_width / original_height
+        if original_width > original_height:
+            new_width = min(original_width, max_dimension)
+            new_height = int(new_width / aspect_ratio)
+        else:
+            new_height = min(original_height, max_dimension)
+            new_width = int(new_height * aspect_ratio)
+        # Round to nearest multiple of 8 (required for diffusion models)
+        new_width = (new_width // 8) * 8
+        new_height = (new_height // 8) * 8
+        return new_width, new_height
+    def generate_retro_art(
+        self,
+        input_image,
+        prompt="retro pixel art game, 16-bit style, vibrant colors",
+        negative_prompt="blurry, low quality, modern, photorealistic, 3d render",
+        num_inference_steps=30,
+        guidance_scale=7.5,
+        controlnet_conditioning_scale=0.8,
+        lora_scale=0.85
+    ):
+        """Main generation function"""
+        # Resize image maintaining aspect ratio
+        original_width, original_height = input_image.size
+        target_width, target_height = self.calculate_target_size(original_width, original_height)
+        print(f"Resizing from {original_width}x{original_height} to {target_width}x{target_height}")
+        resized_image = input_image.resize((target_width, target_height), Image.LANCZOS)
+        # Detect faces
+        faces = self.detect_faces(resized_image)
+        has_faces = len(faces) > 0
+        if has_faces:
+            print(f"Detected {len(faces)} face(s)")
+            # Enhance prompt for face preservation
+            prompt = f"portrait, detailed face, {prompt}"
+        # Generate depth map
+        print("Generating depth map...")
+        depth_image = self.get_depth_map(resized_image)
+        depth_image = depth_image.resize((target_width, target_height), Image.LANCZOS)
+        # Set LORA scale
+        self.pipe.set_adapters(["retroart"], adapter_weights=[lora_scale])
+        # Generate image
+        print("Generating retro art...")
+        result = self.pipe(
+            prompt=prompt,
+            negative_prompt=negative_prompt,
+            image=depth_image,
+            num_inference_steps=num_inference_steps,
+            guidance_scale=guidance_scale,
+            controlnet_conditioning_scale=controlnet_conditioning_scale,
+            width=target_width,
+            height=target_height,
+            generator=torch.Generator(device=self.device).manual_seed(42)
+        )
+        return result.images[0]
+# Initialize the converter
+print("Initializing RetroArt Converter...")
+converter = RetroArtConverter()
+# Gradio interface
+def process_image(
+    image,
+    prompt,
+    negative_prompt,
+    steps,
+    guidance_scale,
+    controlnet_scale,
+    lora_scale
+):
+    if image is None:
+        return None
+    try:
+        result = converter.generate_retro_art(
+            input_image=image,
+            prompt=prompt,
+            negative_prompt=negative_prompt,
+            num_inference_steps=int(steps),
+            guidance_scale=guidance_scale,
+            controlnet_conditioning_scale=controlnet_scale,
+            lora_scale=lora_scale
+        )
+        return result
+    except Exception as e:
+        print(f"Error: {e}")
+        raise gr.Error(f"Generation failed: {str(e)}")
+# Create Gradio interface
+with gr.Blocks(title="RetroArt Converter") as demo:
+    gr.Markdown("""
+    # 🎮 RetroArt Converter
+    Convert any image into retro game art style!
+    **Features:**
+    - Custom SDXL checkpoint (Horizon)
+    - Pixelate VAE for authentic retro look
+    - RetroArt LORA for style enhancement
+    - Face preservation with InstantID
+    - Depth-aware generation with ControlNet
+    """)
+    with gr.Row():
+        with gr.Column():
+            input_image = gr.Image(label="Input Image", type="pil")
+            prompt = gr.Textbox(
+                label="Prompt",
+                value="retro pixel art game, 16-bit style, vibrant colors, detailed",
+                lines=3
+            )
+            negative_prompt = gr.Textbox(
+                label="Negative Prompt",
+                value="blurry, low quality, modern, photorealistic, 3d render, ugly, distorted",
+                lines=2
+            )
+            with gr.Accordion("Advanced Settings", open=False):
+                steps = gr.Slider(
+                    minimum=20,
+                    maximum=50,
+                    value=30,
+                    step=1,
+                    label="Inference Steps"
+                )
+                guidance_scale = gr.Slider(
+                    minimum=1,
+                    maximum=15,
+                    value=7.5,
+                    step=0.5,
+                    label="Guidance Scale"
+                )
+                controlnet_scale = gr.Slider(
+                    minimum=0,
+                    maximum=2,
+                    value=0.8,
+                    step=0.1,
+                    label="ControlNet Depth Scale"
+                )
+                lora_scale = gr.Slider(
+                    minimum=0,
+                    maximum=2,
+                    value=0.85,
+                    step=0.05,
+                    label="RetroArt LORA Scale"
+                )
+            generate_btn = gr.Button("🎨 Generate Retro Art", variant="primary")
+        with gr.Column():
+            output_image = gr.Image(label="Retro Art Output")
+    gr.Examples(
+        examples=[
+            ["example_portrait.jpg", "retro pixel art portrait, 16-bit game character", "blurry, modern", 30, 7.5, 0.8, 0.85],
+        ],
+        inputs=[input_image, prompt, negative_prompt, steps, guidance_scale, controlnet_scale, lora_scale],
+        outputs=[output_image],
+        fn=process_image,
+        cache_examples=False
+    )
+    generate_btn.click(
+        fn=process_image,
+        inputs=[input_image, prompt, negative_prompt, steps, guidance_scale, controlnet_scale, lora_scale],
+        outputs=[output_image]
+    )
+# Launch with API enabled
+if __name__ == "__main__":
+    demo.queue(max_size=20)
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False,
+        show_api=True  # Enable API
+    )

requirements.txt ADDED Viewed

	@@ -0,0 +1,19 @@

+torch==2.1.0
+torchvision==0.16.0
+diffusers==0.25.0
+transformers==4.36.0
+accelerate==0.25.0
+gradio==4.12.0
+pillow==10.1.0
+numpy==1.24.3
+opencv-python==4.8.1.78
+safetensors==0.4.1
+insightface==0.7.3
+onnxruntime-gpu==1.16.3
+onnx==1.15.0
+scikit-image==0.22.0
+scipy==1.11.4
+omegaconf==2.3.0
+einops==0.7.0
+xformers==0.0.23
+huggingface-hub==0.20.1