import spaces import gradio as gr import torch from PIL import Image from transformers import AutoProcessor from longcat_image.models import LongCatImageTransformer2DModel from longcat_image.pipelines import LongCatImageEditPipeline, LongCatImagePipeline import numpy as np # Load models directly at startup device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Text-to-Image Model t2i_model_id = 'meituan-longcat/LongCat-Image' print(f"🔄 Loading Text-to-Image model from {t2i_model_id}...") t2i_text_processor = AutoProcessor.from_pretrained( t2i_model_id, subfolder='tokenizer' ) t2i_transformer = LongCatImageTransformer2DModel.from_pretrained( t2i_model_id, subfolder='transformer', torch_dtype=torch.bfloat16, use_safetensors=True ).to(device) t2i_pipe = LongCatImagePipeline.from_pretrained( t2i_model_id, transformer=t2i_transformer, text_processor=t2i_text_processor, ) t2i_pipe.to(device, torch.bfloat16) print(f"✅ Text-to-Image model loaded successfully") # Image Edit Model edit_model_id = 'meituan-longcat/LongCat-Image-Edit' print(f"🔄 Loading Image Edit model from {edit_model_id}...") edit_text_processor = AutoProcessor.from_pretrained( edit_model_id, subfolder='tokenizer' ) edit_transformer = LongCatImageTransformer2DModel.from_pretrained( edit_model_id, subfolder='transformer', torch_dtype=torch.bfloat16, use_safetensors=True ).to(device) edit_pipe = LongCatImageEditPipeline.from_pretrained( edit_model_id, transformer=edit_transformer, text_processor=edit_text_processor, ) edit_pipe.to(device, torch.bfloat16) print(f"✅ Image Edit model loaded successfully on {device}") @spaces.GPU(duration=120) def generate_image( prompt: str, negative_prompt: str, width: int, height: int, guidance_scale: float, num_inference_steps: int, seed: int, enable_cfg_renorm: bool, enable_prompt_rewrite: bool, progress=gr.Progress() ): """Generate image from text prompt""" if not prompt or prompt.strip() == "": raise gr.Error("Please enter a prompt") try: progress(0.1, desc="Preparing generation...") progress(0.2, desc="Generating image...") # Set random seed for reproducibility generator = torch.Generator("cuda" if torch.cuda.is_available() else "cpu").manual_seed(seed) # Run the pipeline with torch.inference_mode(): output = t2i_pipe( prompt, negative_prompt=negative_prompt, height=height, width=width, guidance_scale=guidance_scale, num_inference_steps=num_inference_steps, num_images_per_prompt=1, generator=generator, enable_cfg_renorm=enable_cfg_renorm, enable_prompt_rewrite=enable_prompt_rewrite ) progress(1.0, desc="Done!") generated_image = output.images[0] return generated_image except Exception as e: raise gr.Error(f"Error during image generation: {str(e)}") @spaces.GPU(duration=120) def edit_image( input_image: Image.Image, prompt: str, negative_prompt: str, guidance_scale: float, num_inference_steps: int, seed: int, progress=gr.Progress() ): """Edit image based on text prompt""" if input_image is None: raise gr.Error("Please upload an image first") if not prompt or prompt.strip() == "": raise gr.Error("Please enter an edit instruction") try: progress(0.1, desc="Preparing image...") # Convert to RGB if needed if input_image.mode != 'RGB': input_image = input_image.convert('RGB') progress(0.2, desc="Generating edited image...") # Set random seed for reproducibility generator = torch.Generator("cuda" if torch.cuda.is_available() else "cpu").manual_seed(seed) # Run the pipeline with torch.inference_mode(): output = edit_pipe( input_image, prompt, negative_prompt=negative_prompt, guidance_scale=guidance_scale, num_inference_steps=num_inference_steps, num_images_per_prompt=1, generator=generator ) progress(1.0, desc="Done!") edited_image = output.images[0] return edited_image except Exception as e: raise gr.Error(f"Error during image editing: {str(e)}") # Example for image editing edit_example_image_url = "https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.png" edit_example_data = [ [edit_example_image_url, "Add a mustache", "", 4.5, 50, 42], ] # Examples for text-to-image t2i_example_prompts = [ ["一个年轻的亚裔女性,身穿黄色针织衫,搭配白色项链。她的双手放在膝盖上,表情恬静。背景是一堵粗糙的砖墙,午后的阳光温暖地洒在她身上,营造出一种宁静而温馨的氛围。", "", 1344, 768, 4.5, 50, 43, True, True], ["A serene mountain landscape at sunset with golden clouds", "", 1344, 768, 4.5, 50, 42, True, True], ["A cute robot sitting at a desk, digital art style", "", 1024, 1024, 4.5, 50, 44, True, True], ] # Build Gradio interface with gr.Blocks(fill_height=True) as demo: gr.HTML("""
Generate images from text or edit existing images with AI-powered tools
Built with anycoder
⚡ Powered by Zero-GPU | 🤗 Models: Text-to-Image & Image Edit
⏱️ Note: Zero-GPU provides 120 seconds of GPU time per request. Models are loaded at startup from Hugging Face Hub. Processing typically takes 30-60 seconds depending on settings.
Powered by LongCat Image & LongCat Image Edit | Built with anycoder