import gradio as gr import torch from diffusers import StableDiffusionImg2ImgPipeline from PIL import Image # --- Model Loading --- # print('Loading Ghibli and Anime style models...') # Determine the device to run the model on device = 'cuda' if torch.cuda.is_available() else 'cpu' print(f"Using device: {device}") # --- Load Ghibli Style Model --- ghibli_model_id = 'nitrosocke/Ghibli-Diffusion' # Note: `torch_dtype` is used here instead of `dtype` due to observed behavior in diffusers 0.36.0 # where `dtype` was ignored and `torch_dtype` was effective despite deprecation warnings. if device == 'cuda': ghibli_pipeline = StableDiffusionImg2ImgPipeline.from_pretrained( ghibli_model_id, torch_dtype=torch.float16, cache_dir='./model_cache_ghibli' ) else: ghibli_pipeline = StableDiffusionImg2ImgPipeline.from_pretrained( ghibli_model_id, torch_dtype=torch.float32, cache_dir='./model_cache_ghibli' ) ghibli_pipeline.to(device) print(f'Ghibli Style Model ({ghibli_model_id}) loaded successfully.') # --- Load Anime Style Model --- anime_model_id = 'hakurei/waifu-diffusion' if device == 'cuda': anime_pipeline = StableDiffusionImg2ImgPipeline.from_pretrained( anime_model_id, torch_dtype=torch.float16, cache_dir='./model_cache_anime' ) else: anime_pipeline = StableDiffusionImg2ImgPipeline.from_pretrained( anime_model_id, torch_dtype=torch.float32, cache_dir='./model_cache_anime' ) anime_pipeline.to(device) print(f'Anime Style Model ({anime_model_id}) loaded successfully.') print('Both Ghibli and Anime Style Models loaded and moved to device successfully.') # --- Transformation Function --- def cartoon_transform(input_image: Image.Image, style: str) -> Image.Image: """ Applies a cartoon-style transformation to the input image using the loaded Stable Diffusion pipelines. Args: input_image (PIL.Image.Image): The input image to transform. style (str): The desired cartoon style ('Ghibli' or 'Anime'). Returns: PIL.Image.Image: The transformed image in the selected cartoon style. """ # Ensure the image is in RGB format if input_image.mode != 'RGB': input_image = input_image.convert('RGB') # Set reasonable dimensions to avoid excessive memory usage and ensure reasonable processing time # while maintaining aspect ratio max_dim = 768 # Maximum dimension for processing width, height = input_image.size if max(width, height) > max_dim: ratio = max_dim / max(width, height) new_width = int(width * ratio) new_height = int(height * ratio) input_image = input_image.resize((new_width, new_height), Image.LANCZOS) # Define pipelines, prompts, and parameters based on style if style == 'Ghibli': pipeline_to_use = ghibli_pipeline prompt = "Studio Ghibli style, detailed, vibrant colors, fantasy, magical, serene" strength = 0.75 guidance_scale = 7.5 num_inference_steps = 25 # Reduced for faster processing elif style == 'Anime': pipeline_to_use = anime_pipeline prompt = "anime character, vibrant, digital art, high quality, detailed eyes" strength = 0.8 # Slightly higher strength for a more pronounced anime effect guidance_scale = 8.0 num_inference_steps = 25 # Reduced for faster processing else: raise ValueError(f"Unsupported style: {style}. Choose from 'Ghibli' or 'Anime'.") # Run the image-to-image pipeline transformed_image = pipeline_to_use( prompt=prompt, image=input_image, strength=strength, guidance_scale=guidance_scale, num_inference_steps=num_inference_steps ).images[0] print(f'Image transformed to {style} style.') return transformed_image # --- Gradio Interface --- # Available cartoon styles cartoon_styles = ['Ghibli', 'Anime'] # Create the Gradio interface iface = gr.Interface( fn=cartoon_transform, inputs=[ gr.Image(type='pil', label='Upload your picture'), gr.Dropdown( choices=cartoon_styles, label='Select Cartoon Style', value='Ghibli' # Default selection ) ], outputs=gr.Image(type='pil', label='Transformed Image'), title='Cartoon Style Image Transformer', description='Upload a picture and transform it into various cartoon styles.' ) # Launch the Gradio app - this part is typically removed or commented out when deploying to Hugging Face Spaces, # as Spaces handle the launch automatically. if __name__ == '__main__': print('Launching Gradio interface locally...') iface.launch(share=True) # share=True for Colab, change to False for local dev