Spaces:

b2bomber
/

AIAvatarGenerator

Sleeping

App Files Files Community

b2bomber commited on Jul 29, 2025

Commit

14d4d16

verified ·

1 Parent(s): abf7663

Update app.py

Browse files

Files changed (1) hide show

app.py +124 -68

app.py CHANGED Viewed

@@ -2,58 +2,63 @@ import gradio as gr
 import torch
 from diffusers import StableDiffusionPipeline, DDIMScheduler, AutoencoderTiny
 from PIL import Image
-# 1. Force CPU usage
 device = "cpu"
-# 2. Choose a smaller/distilled Stable Diffusion model
-#    'nota-ai/bk-sdm-small' is a good example of a distilled model that's faster.
-#    Another option is 'segmind/SSD-1B' (though still relatively large, it's optimized).
-#    For truly tiny models, you might look for "TinySD" variations.
-#    Let's start with a well-known distilled model for better CPU performance.
-model_id = "nota-ai/bk-sdm-small" # Smaller and faster than SD 2.1
-# model_id = "segmind/SSD-1B" # Another optimized, but still larger, option.
-# Load the pipeline. For CPU, use torch_dtype=torch.float32.
-# Disable safe_serialization if you encounter issues with some older models.
 print(f"Loading model: {model_id} on {device}...")
 try:
     pipe = StableDiffusionPipeline.from_pretrained(
         model_id,
-        torch_dtype=torch.float32, # CPU usually prefers float32 for stability/speed unless specialized kernels are used
-        low_cpu_mem_usage=True # Helps with memory on CPU
-    )
-except Exception as e:
-    print(f"Error loading model {model_id}: {e}. Trying without low_cpu_mem_usage.")
-    pipe = StableDiffusionPipeline.from_pretrained(
-        model_id,
-        torch_dtype=torch.float32,
     )
-# Optimize VAE (Very Important for Speed and Memory on CPU)
-# The VAE (Variational AutoEncoder) is a bottleneck. Using a tiny VAE helps a lot.
-# 'sayakpaul/taesd-diffusers' is a known tiny VAE.
-print("Loading Tiny VAE...")
-try:
-    pipe.vae = AutoencoderTiny.from_pretrained("sayakpaul/taesd-diffusers", torch_dtype=torch.float32)
-except Exception as e:
-    print(f"Could not load Tiny VAE: {e}. Model might be slower.")
-    # Fallback: if Tiny VAE fails, ensure the default VAE is on CPU
-    pipe.vae.to(device)
-# Move pipeline components to CPU explicitly
-pipe.to(device)
-# Set up the scheduler. DDIMScheduler is fine.
-pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
-# Enable CPU offload for even lower memory (can make it slower, but might be necessary for very limited RAM)
-# pipe.enable_sequential_cpu_offload() # Use if you hit OOM errors, but it will be much slower.
-print("Model loaded and configured.")
-# Preset styles (same as before)
 styles = {
     "Pixar": "pixar style portrait of",
     "Anime": "anime style portrait of",
@@ -63,50 +68,101 @@ styles = {
     "Astronaut": "realistic astronaut with helmet, portrait of"
 }
-def generate_avatar(image, style):
-    if image is None:
-        # You might want to generate a default image or throw an error via Gradio
-        # For a more robust app, consider a placeholder image or a clear error message in the UI.
         gr.Warning("Please upload an image to generate an avatar.")
         return None
-    # Although the original intent was image-to-image, your current logic
-    # converts the image input into a text-only prompt.
-    # To truly use the image as input, you would need an img2img pipeline or a specific
-    # controlnet/adapter for Stable Diffusion.
-    # For now, let's keep it as a text-to-image generation based on the style and a generic prompt.
     base_prompt = styles[style]
-    # For CPU, fewer steps and lower guidance scale can yield faster (but potentially lower quality) results.
-    num_inference_steps = 20 # Reduced for speed
-    guidance_scale = 7.0 # Slightly reduced guidance
-    prompt = f"{base_prompt} a person, high quality, detailed, professional" # Enhance prompt
-    negative_prompt = "low resolution, blurry, distorted, bad quality, ugly, cartoon, sketch" # Add negative prompt for better results
-    # Generate image
-    print(f"Generating for style: {style} with prompt: {prompt}")
-    with torch.no_grad(): # Disable gradient calculations for inference
-        generated_image = pipe(
-            prompt=prompt,
-            negative_prompt=negative_prompt,
-            num_inference_steps=num_inference_steps,
-            guidance_scale=guidance_scale
-        ).images[0]
-    return generated_image
 with gr.Blocks() as demo:
     gr.Markdown("## 🎨 Stable Diffusion Avatar Generator with Preset Styles (CPU Optimized)")
-    gr.Markdown("This demo uses a smaller, distilled Stable Diffusion model and is optimized for CPU inference. Generation will still take time on CPU, but should be faster than larger models.")
     with gr.Row():
         with gr.Column():
-            image_input = gr.Image(label="Upload your photo (Note: Image currently used only to trigger generation, not as direct input)", type="pil", sources=["upload", "webcam"])
-            style_selector = gr.Radio(choices=list(styles.keys()), label="Choose a style", value="Anime")
-            generate_btn = gr.Button("Generate Avatar")
         with gr.Column():
             output_image = gr.Image(label="Generated Avatar")
-    generate_btn.click(fn=generate_avatar, inputs=[image_input, style_selector], outputs=output_image)
 demo.launch()

 import torch
 from diffusers import StableDiffusionPipeline, DDIMScheduler, AutoencoderTiny
 from PIL import Image
+import os # For better logging/debugging
+# --- Configuration ---
+# 1. Force CPU usage for compatibility on Spaces without GPU
 device = "cpu"
+# 2. Choose a smaller/distilled Stable Diffusion model for CPU speed
+#    'nota-ai/bk-sdm-small' is a good balance of size/speed/quality for CPU.
+#    If quality is paramount and you can tolerate more time, consider 'runwayml/stable-diffusion-v1-5'
+#    but expect significantly slower generation times on CPU.
+model_id = "nota-ai/bk-sdm-small"
+# 3. Tiny VAE for drastically faster encoding/decoding on CPU
+tiny_vae_id = "sayakpaul/taesd-diffusers"
+# --- Model Loading ---
+# Load the pipeline globally to avoid reloading on each request
 print(f"Loading model: {model_id} on {device}...")
 try:
+    # Use StableDiffusionPipeline for Text-to-Image generation
+    # If you want Image-to-Image, you'd use StableDiffusionImg2ImgPipeline here.
     pipe = StableDiffusionPipeline.from_pretrained(
         model_id,
+        torch_dtype=torch.float32, # CPU usually prefers float32 for stability/speed
+        low_cpu_mem_usage=True,    # Helps with memory on CPU
+        safety_checker=None        # Disable safety checker to save CPU cycles and memory
     )
+    print("Main pipeline loaded.")
+    # Load and assign the Tiny VAE for speed optimization
+    print(f"Loading Tiny VAE from {tiny_vae_id}...")
+    try:
+        pipe.vae = AutoencoderTiny.from_pretrained(tiny_vae_id, torch_dtype=torch.float32)
+        print("Tiny VAE loaded successfully.")
+    except Exception as vae_e:
+        print(f"Warning: Could not load Tiny VAE '{tiny_vae_id}': {vae_e}. Using default VAE (might be slower).")
+        # Ensure default VAE is on CPU
+        pipe.vae.to(device)
+    # Move entire pipeline to CPU explicitly
+    pipe.to(device)
+    # Set up the scheduler. DDIMScheduler is a good choice.
+    pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
+    # Optional: Enable CPU offload if you run into Out-Of-Memory errors on CPU with larger models.
+    # Be aware: This will make generation *much* slower.
+    # pipe.enable_sequential_cpu_offload()
+    print("Model loaded and configured successfully.")
+except Exception as e:
+    print(f"FATAL ERROR: Failed to load models: {e}")
+    # Raise an exception to prevent the app from starting if model loading fails
+    raise RuntimeError(f"Failed to load Stable Diffusion model: {e}")
+# --- Preset Styles ---
 styles = {
     "Pixar": "pixar style portrait of",
     "Anime": "anime style portrait of",
     "Astronaut": "realistic astronaut with helmet, portrait of"
 }
+# --- Generation Function ---
+def generate_avatar(image_input: Image.Image, style: str):
+    """
+    Generates an avatar based on a chosen style using Stable Diffusion.
+    Note: In this text-to-image setup, the uploaded `image_input` is used
+    only to trigger the generation, not to influence the image content directly.
+    """
+    if image_input is None:
         gr.Warning("Please upload an image to generate an avatar.")
         return None
+    # Base prompt from selected style
     base_prompt = styles[style]
+    # Enhance prompt for better quality
+    prompt = f"{base_prompt} a person, highly detailed, professional, studio lighting, volumetric lighting, 4k, cinematic"
+    negative_prompt = "low resolution, blurry, distorted, bad quality, ugly, cartoon, sketch, duplicate, out of frame, bad anatomy, deformed, extra limbs, watermark, text"
+    # Inference parameters (adjusted for speed on CPU, can be tweaked for quality)
+    num_inference_steps = 25 # Increased slightly for better quality, balance with speed
+    guidance_scale = 7.5    # Slightly increased for stronger adherence to prompt
+    print(f"Generating for style: {style} with prompt: '{prompt}' (Steps: {num_inference_steps}, Guidance: {guidance_scale})")
+    try:
+        # Use torch.no_grad() for efficient inference (disables gradient calculations)
+        with torch.no_grad(): # Or torch.inference_mode() for PyTorch >= 1.9
+            generated_image = pipe(
+                prompt=prompt,
+                negative_prompt=negative_prompt,
+                num_inference_steps=num_inference_steps,
+                guidance_scale=guidance_scale,
+                height=512, # Explicitly set output dimensions, can try 768 for SD 2.1 or larger models
+                width=512
+            ).images[0]
+        print("Image generation complete.")
+        return generated_image
+    except Exception as e:
+        print(f"Error during image generation: {e}")
+        gr.Error(f"An error occurred during generation: {e}")
+        return None
+# --- Gradio Interface ---
 with gr.Blocks() as demo:
     gr.Markdown("## 🎨 Stable Diffusion Avatar Generator with Preset Styles (CPU Optimized)")
+    gr.Markdown(
+        "This demo uses a smaller, distilled Stable Diffusion model and is optimized for CPU inference. "
+        "Generation will still take time on CPU compared to GPU (e.g., 20-60 seconds per image depending on CPU and parameters).<br>"
+        "**Note:** The uploaded image is currently used only to trigger generation and is not directly influencing the avatar's appearance. "
+        "It's here for user reference or potential future Image-to-Image features."
+    )
     with gr.Row():
         with gr.Column():
+            # Image input component (type="pil" for Pillow Image object)
+            image_input = gr.Image(
+                label="Upload your photo",
+                type="pil",
+                sources=["upload", "webcam"], # Allow file upload or webcam capture
+                # You might want to set a default for testing: value="path/to/default_image.jpg"
+            )
+            style_selector = gr.Radio(
+                choices=list(styles.keys()),
+                label="Choose a style",
+                value="Anime" # Default selected style
+            )
+            generate_btn = gr.Button("Generate Avatar", variant="primary")
         with gr.Column():
             output_image = gr.Image(label="Generated Avatar")
+    # Connect the button click to the generation function
+    generate_btn.click(
+        fn=generate_avatar,
+        inputs=[image_input, style_selector],
+        outputs=output_image
+    )
+    gr.Examples(
+        examples=[
+            [None, "Pixar"],
+            [None, "Anime"],
+            [None, "Cyberpunk"],
+            [None, "Disney"],
+            [None, "Sketch"],
+            [None, "Astronaut"]
+        ],
+        inputs=[image_input, style_selector],
+        fn=generate_avatar,
+        outputs=output_image,
+        cache_examples=False, # Set to True if examples are pre-computed, False for live generation
+        label="Quick Examples (Generates new images each time)"
+    )
+# Launch the Gradio application
 demo.launch()