ghibli

Paused

App Files Files Community

ar08 commited on Apr 6, 2025

Commit

cec94f6

verified ·

1 Parent(s): 5205339

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -13

app.py CHANGED Viewed

@@ -1,27 +1,62 @@
 import gradio as gr
 import torch
-from diffusers import StableDiffusionPipeline
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model_id = "nitrosocke/Ghibli-Diffusion"
-# Load the model once and keep it in memory
-pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16 if device == "cuda" else torch.float32)
 pipe.to(device)
-pipe.enable_attention_slicing()  # Optimize memory usage
-def generate_ghibli_style(image):
     prompt = "ghibli style portrait"
-    with torch.inference_mode():  # Disables gradient calculations for faster inference
-        result = pipe(prompt, image=image, strength=0.6, guidance_scale=6.5, num_inference_steps=25).images[0]  # Reduced steps & optimized scale
-    return result
 iface = gr.Interface(
     fn=generate_ghibli_style,
-    inputs=gr.Image(type="pil"),
-    outputs=gr.Image(),
-    title="Studio Ghibli Portrait Generator",
-    description="Upload a photo to generate a Ghibli-style portrait!"
 )
-iface.launch()

 import gradio as gr
 import torch
+from diffusers import StableDiffusionImg2ImgPipeline
+from PIL import Image
+import numpy as np
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model_id = "nitrosocke/Ghibli-Diffusion"
+# Load the model
+pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
+    model_id,
+    torch_dtype=torch.float16 if device == "cuda" else torch.float32,
+    safety_checker=None
+)
 pipe.to(device)
+pipe.enable_attention_slicing()
+# Function to convert PIL image to latent-compatible numpy
+def pil_to_np(image):
+    return np.array(image).astype(np.uint8)
+# Generator with step-wise callback
+def generate_ghibli_style(image, steps=25):
     prompt = "ghibli style portrait"
+    np_image = pil_to_np(image)
+    intermediate_images = []
+    def callback(step: int, timestep: int, latents):
+        # Decode latents to image and store for preview
+        with torch.no_grad():
+            img = pipe.decode_latents(latents)
+            img = pipe.numpy_to_pil(img)[0]
+        intermediate_images.append(img)
+    # Run the generation
+    with torch.inference_mode():
+        pipe(
+            prompt=prompt,
+            image=image,
+            strength=0.6,
+            guidance_scale=6.0,
+            num_inference_steps=steps,
+            callback=callback,
+            callback_steps=1,  # Callback at every step
+        )
+    return intermediate_images
+# Gradio Interface with image gallery preview
 iface = gr.Interface(
     fn=generate_ghibli_style,
+    inputs=[
+        gr.Image(type="pil", label="Upload a photo"),
+        gr.Slider(minimum=10, maximum=50, value=25, step=1, label="Inference Steps")
+    ],
+    outputs=gr.Gallery(label="Ghibli-style Generation Progress").style(grid=4),
+    title="✨ Studio Ghibli Portrait Generator ✨",
+    description="Upload a photo and watch it transform into a Ghibli-style portrait step by step!"
 )
+iface.launch(share=True)