import gradio as gr import torch from PIL import Image from diffusers import StableDiffusionXLPipeline, ControlNetModel, AutoencoderKL from transformers import CLIPVisionModelWithProjection import spaces import os import numpy as np # Pre-defined head rotation angles (pitch, yaw in degrees) VIEW_ANGLES = { "front": (0, 0), "3/4-left": (0, -35), "3/4-right": (0, 35), "profile-left": (0, -90), "profile-right": (0, 90), } # Global pipeline (loaded once) pipe = None ip_adapter = None def load_pipeline(): """Load the IP-Adapter FaceID Plus pipeline""" global pipe if pipe is not None: return pipe # Load IP-Adapter FaceID Plus with SDXL from diffusers import StableDiffusionXLPipeline from diffusers.utils import load_image # Base model - using a good anime-capable SDXL model model_id = "stabilityai/stable-diffusion-xl-base-1.0" pipe = StableDiffusionXLPipeline.from_pretrained( model_id, torch_dtype=torch.float16, variant="fp16", ) # Load IP-Adapter FaceID Plus pipe.load_ip_adapter( "h94/IP-Adapter-FaceID", subfolder=None, weight_name="ip-adapter-faceid-plusv2_sdxl.bin", image_encoder_folder="models/image_encoder", ) pipe.set_ip_adapter_scale(0.7) pipe.enable_model_cpu_offload() return pipe @spaces.GPU def generate_view( face_image: Image.Image, view_angle: str, prompt: str, negative_prompt: str = "", strength: float = 0.8, guidance_scale: float = 5.0, num_steps: int = 30, seed: int = 42, ) -> Image.Image: """ Generate a character view from a reference face. Args: face_image: Reference face image (front view ideally) view_angle: One of 'front', '3/4-left', '3/4-right', 'profile-left', 'profile-right' prompt: Description of the character/style negative_prompt: What to avoid strength: IP-Adapter strength (0.0-1.0) guidance_scale: CFG scale num_steps: Number of inference steps seed: Random seed for reproducibility Returns: Generated image at the specified angle """ pipe = load_pipeline() # Add view-specific prompt additions angle_prompts = { "front": "front view, facing camera, symmetrical", "3/4-left": "3/4 view, turned slightly left, three quarter view", "3/4-right": "3/4 view, turned slightly right, three quarter view", "profile-left": "side profile, facing left, profile view", "profile-right": "side profile, facing right, profile view", } view_prompt = angle_prompts.get(view_angle, "") full_prompt = f"{prompt}, {view_prompt}, portrait, character art, consistent character" default_negative = "deformed, ugly, bad anatomy, blurry, low quality, worst quality, text, watermark" full_negative = f"{negative_prompt}, {default_negative}" if negative_prompt else default_negative # Set seed for reproducibility generator = torch.Generator(device="cpu").manual_seed(seed) # Generate with IP-Adapter face reference result = pipe( prompt=full_prompt, negative_prompt=full_negative, ip_adapter_image=face_image, num_inference_steps=num_steps, guidance_scale=guidance_scale, generator=generator, width=1024, height=1024, ) return result.images[0] # Gradio Interface with gr.Blocks(title="Character View Generator") as demo: gr.Markdown(""" # Character View Generator Generate consistent character views from a reference face image. Designed for character sheet pipelines. """) with gr.Row(): with gr.Column(): face_input = gr.Image(label="Reference Face", type="pil") view_dropdown = gr.Dropdown( choices=list(VIEW_ANGLES.keys()), value="front", label="Target View Angle" ) prompt_input = gr.Textbox( label="Prompt", placeholder="anime character, blue skin, orange eyes...", lines=2 ) negative_input = gr.Textbox( label="Negative Prompt (optional)", placeholder="realistic, photo...", lines=2 ) with gr.Row(): strength_slider = gr.Slider(0.0, 1.0, value=0.8, label="Identity Strength") guidance_slider = gr.Slider(1.0, 20.0, value=5.0, label="Guidance Scale") with gr.Row(): steps_slider = gr.Slider(10, 50, value=30, step=1, label="Steps") seed_input = gr.Number(value=42, label="Seed", precision=0) generate_btn = gr.Button("Generate View", variant="primary") with gr.Column(): output_image = gr.Image(label="Generated View") generate_btn.click( fn=generate_view, inputs=[ face_input, view_dropdown, prompt_input, negative_input, strength_slider, guidance_slider, steps_slider, seed_input, ], outputs=output_image, api_name="generate_view" # Enables API access ) if __name__ == "__main__": demo.queue(api_open=True) demo.launch()