ghibli

Paused

App Files Files Community

ar08 commited on Apr 6, 2025

Commit

20dc4d8

verified ·

1 Parent(s): f01a187

Update app.py

Browse files

Files changed (1) hide show

app.py +104 -74

app.py CHANGED Viewed

@@ -1,11 +1,10 @@
 import gradio as gr
 import torch
 from diffusers import StableDiffusionImg2ImgPipeline
 from PIL import Image
-import numpy as np
 from typing import Generator, List
-# Set up device and model
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model_id = "nitrosocke/Ghibli-Diffusion"
@@ -17,98 +16,129 @@ pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
 pipe = pipe.to(device)
 pipe.enable_attention_slicing()
 def generate_ghibli_style(
     input_image: Image.Image,
     steps: int = 25,
     strength: float = 0.6,
-    guidance_scale: float = 7.0,
-    progress: gr.Progress = gr.Progress()
-) -> Generator[List[Image.Image], None, None]:
-    """
-    Generate Ghibli-style images in real-time with intermediate steps
-    """
-    prompt = "ghibli style, high quality, detailed portrait"
-    negative_prompt = "low quality, blurry, bad anatomy"
-    intermediate_images = []
-    def callback(step: int, timestep: int, latents: torch.Tensor):
         with torch.no_grad():
-            # Decode the latents to image
-            image = pipe.decode_latents(latents)
-            image = pipe.numpy_to_pil(image)[0]
-            intermediate_images.append(image)
-        # Update progress and yield the current images
-        progress(step / steps, desc="Generating...")
-        yield intermediate_images
-    # Run the pipeline
-    with torch.inference_mode():
-        # Create a generator that will yield the images
-        generator = pipe(
-            prompt=prompt,
-            image=input_image,
-            negative_prompt=negative_prompt,
-            strength=strength,
-            guidance_scale=guidance_scale,
-            num_inference_steps=steps,
-            callback=callback,
-            callback_steps=1  # Call after every step
-        )
-        # Yield the final result
-        final_image = generator.images[0]
-        intermediate_images.append(final_image)
-        yield intermediate_images
-# Custom CSS for better appearance
-css = """
-.gallery {
-    min-height: 500px;
-}
-.gallery img {
-    max-height: 400px;
-    object-fit: contain;
-}
-"""
 # Gradio interface
-with gr.Blocks(css=css) as demo:
-    gr.Markdown("# ✨ Studio Ghibli Portrait Generator ✨")
-    gr.Markdown("Upload a photo and watch it transform into a Ghibli-style portrait in real-time!")
     with gr.Row():
         with gr.Column():
-            input_image = gr.Image(label="Upload Photo", type="pil")
-            steps_slider = gr.Slider(10, 50, value=25, step=1, label="Inference Steps")
-            strength_slider = gr.Slider(0.1, 0.9, value=0.6, step=0.05, label="Transformation Strength")
-            generate_btn = gr.Button("Generate", variant="primary")
         with gr.Column():
             gallery = gr.Gallery(
                 label="Generation Progress",
                 show_label=True,
-                elem_id="gallery",
-                preview=True
             )
-    # Example images
-    gr.Examples(
-        examples=[
-            ["examples/portrait1.jpg", 25, 0.6],
-            ["examples/portrait2.jpg", 30, 0.5],
-        ],
-        inputs=[input_image, steps_slider, strength_slider],
-        label="Try these examples!"
-    )
     generate_btn.click(
         fn=generate_ghibli_style,
         inputs=[input_image, steps_slider, strength_slider],
-        outputs=gallery
     )
-# Launch the app
 if __name__ == "__main__":
-    demo.queue(concurrency_count=1).launch(share=True)

 import gradio as gr
 import torch
+import numpy as np
 from diffusers import StableDiffusionImg2ImgPipeline
 from PIL import Image
 from typing import Generator, List
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model_id = "nitrosocke/Ghibli-Diffusion"
 pipe = pipe.to(device)
 pipe.enable_attention_slicing()
+def resize_and_crop(image: Image.Image, target_size: int = 512) -> Image.Image:
+    """Resize and crop the image to the target size while maintaining aspect ratio."""
+    width, height = image.size
+    if width > height:
+        left = (width - height) // 2
+        right = left + height
+        image = image.crop((left, 0, right, height))
+    elif height > width:
+        top = (height - width) // 2
+        bottom = top + width
+        image = image.crop((0, top, width, bottom))
+    return image.resize((target_size, target_size))
 def generate_ghibli_style(
     input_image: Image.Image,
     steps: int = 25,
     strength: float = 0.6,
+    guidance_scale: float = 7.5
+) -> Generator[Image.Image, None, None]:
+    """Generator that yields intermediate images at each diffusion step."""
+    prompt = "ghibli style, detailed anime portrait, studio ghibli, anime artwork"
+    negative_prompt = "blurry, low quality, sketch, cartoon, 3d, deformed, disfigured"
+    # Preprocess image
+    input_image = resize_and_crop(input_image)
+    init_image = input_image.convert("RGB")
+    # Prepare latent variables
+    init_image = pipe.image_processor.preprocess(init_image)
+    init_latents = pipe.vae.encode(init_image.to(device)).latent_dist.sample()
+    init_latents = pipe.vae.config.scaling_factor * init_latents
+    # Prepare scheduler
+    pipe.scheduler.set_timesteps(steps, device=device)
+    timesteps = pipe.scheduler.timesteps[int(steps * strength):]
+    noise = torch.randn_like(init_latents)
+    latents = pipe.scheduler.add_noise(init_latents, noise, timesteps[:1])
+    # Prepare text embeddings
+    text_inputs = pipe.tokenizer(
+        prompt,
+        padding="max_length",
+        max_length=pipe.tokenizer.model_max_length,
+        return_tensors="pt"
+    )
+    text_embeddings = pipe.text_encoder(text_inputs.input_ids.to(device))[0]
+    # Unconditional embedding
+    uncond_input = pipe.tokenizer(
+        [negative_prompt] * init_image.shape[0],
+        padding="max_length",
+        max_length=text_embeddings.shape[1],
+        return_tensors="pt"
+    )
+    uncond_embeddings = pipe.text_encoder(uncond_input.input_ids.to(device))[0]
+    # Classifier-free guidance
+    text_embeddings = torch.cat([uncond_embeddings, text_embeddings])
+    # Diffusion process
+    for i, t in enumerate(gr.Progress().tqdm(timesteps, desc="Generating")):
+        # Expand latents for classifier-free guidance
+        latent_model_input = torch.cat([latents] * 2)
+        latent_model_input = pipe.scheduler.scale_model_input(latent_model_input, t)
+        # Predict noise
+        noise_pred = pipe.unet(
+            latent_model_input,
+            t,
+            encoder_hidden_states=text_embeddings
+        ).sample
+        # Perform guidance
+        noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
+        noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
+        # Compute previous step
+        latents = pipe.scheduler.step(noise_pred, t, latents).prev_sample
+        # Decode and yield image
         with torch.no_grad():
+            image = pipe.vae.decode(latents / pipe.vae.config.scaling_factor, return_dict=False)[0]
+            image = pipe.image_processor.postprocess(image, output_type="pil")[0]
+        yield image
 # Gradio interface
+with gr.Blocks() as demo:
+    gr.Markdown("# ✨ Studio Ghibli Style Transformer ✨")
+    gr.Markdown("Upload a portrait photo to transform it into a Studio Ghibli-style artwork!")
     with gr.Row():
         with gr.Column():
+            input_image = gr.Image(label="Input Image", type="pil")
+            steps_slider = gr.Slider(10, 50, value=25, label="Number of Steps")
+            strength_slider = gr.Slider(0.1, 0.9, value=0.6, label="Transformation Strength")
+            generate_btn = gr.Button("✨ Transform!", variant="primary")
         with gr.Column():
             gallery = gr.Gallery(
                 label="Generation Progress",
                 show_label=True,
+                columns=5,
+                preview=True,
+                object_fit="contain",
+                height=600
             )
     generate_btn.click(
         fn=generate_ghibli_style,
         inputs=[input_image, steps_slider, strength_slider],
+        outputs=gallery,
+        concurrency_limit=1
+    )
+    gr.Examples(
+        examples=["example1.jpg", "example2.jpg"],
+        inputs=input_image,
+        outputs=gallery,
+        fn=generate_ghibli_style,
+        cache_examples=True
     )
 if __name__ == "__main__":
+    demo.launch()