Spaces:
Running
on
Zero
Running
on
Zero
| import spaces | |
| import gradio as gr | |
| import torch | |
| from PIL import Image | |
| from transformers import AutoProcessor | |
| from longcat_image.models import LongCatImageTransformer2DModel | |
| from longcat_image.pipelines import LongCatImageEditPipeline, LongCatImagePipeline | |
| import numpy as np | |
| # Load models directly at startup | |
| device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
| # Text-to-Image Model | |
| t2i_model_id = 'meituan-longcat/LongCat-Image' | |
| print(f"🔄 Loading Text-to-Image model from {t2i_model_id}...") | |
| t2i_text_processor = AutoProcessor.from_pretrained( | |
| t2i_model_id, | |
| subfolder='tokenizer' | |
| ) | |
| t2i_transformer = LongCatImageTransformer2DModel.from_pretrained( | |
| t2i_model_id, | |
| subfolder='transformer', | |
| torch_dtype=torch.bfloat16, | |
| use_safetensors=True | |
| ).to(device) | |
| t2i_pipe = LongCatImagePipeline.from_pretrained( | |
| t2i_model_id, | |
| transformer=t2i_transformer, | |
| text_processor=t2i_text_processor, | |
| ) | |
| t2i_pipe.to(device, torch.bfloat16) | |
| print(f"✅ Text-to-Image model loaded successfully") | |
| # Image Edit Model | |
| edit_model_id = 'meituan-longcat/LongCat-Image-Edit' | |
| print(f"🔄 Loading Image Edit model from {edit_model_id}...") | |
| edit_text_processor = AutoProcessor.from_pretrained( | |
| edit_model_id, | |
| subfolder='tokenizer' | |
| ) | |
| edit_transformer = LongCatImageTransformer2DModel.from_pretrained( | |
| edit_model_id, | |
| subfolder='transformer', | |
| torch_dtype=torch.bfloat16, | |
| use_safetensors=True | |
| ).to(device) | |
| edit_pipe = LongCatImageEditPipeline.from_pretrained( | |
| edit_model_id, | |
| transformer=edit_transformer, | |
| text_processor=edit_text_processor, | |
| ) | |
| edit_pipe.to(device, torch.bfloat16) | |
| print(f"✅ Image Edit model loaded successfully on {device}") | |
| def generate_image( | |
| prompt: str, | |
| negative_prompt: str, | |
| width: int, | |
| height: int, | |
| guidance_scale: float, | |
| num_inference_steps: int, | |
| seed: int, | |
| enable_cfg_renorm: bool, | |
| enable_prompt_rewrite: bool, | |
| progress=gr.Progress() | |
| ): | |
| """Generate image from text prompt""" | |
| if not prompt or prompt.strip() == "": | |
| raise gr.Error("Please enter a prompt") | |
| try: | |
| progress(0.1, desc="Preparing generation...") | |
| progress(0.2, desc="Generating image...") | |
| # Set random seed for reproducibility | |
| generator = torch.Generator("cuda" if torch.cuda.is_available() else "cpu").manual_seed(seed) | |
| # Run the pipeline | |
| with torch.inference_mode(): | |
| output = t2i_pipe( | |
| prompt, | |
| negative_prompt=negative_prompt, | |
| height=height, | |
| width=width, | |
| guidance_scale=guidance_scale, | |
| num_inference_steps=num_inference_steps, | |
| num_images_per_prompt=1, | |
| generator=generator, | |
| enable_cfg_renorm=enable_cfg_renorm, | |
| enable_prompt_rewrite=enable_prompt_rewrite | |
| ) | |
| progress(1.0, desc="Done!") | |
| generated_image = output.images[0] | |
| return generated_image | |
| except Exception as e: | |
| raise gr.Error(f"Error during image generation: {str(e)}") | |
| def edit_image( | |
| input_image: Image.Image, | |
| prompt: str, | |
| negative_prompt: str, | |
| guidance_scale: float, | |
| num_inference_steps: int, | |
| seed: int, | |
| progress=gr.Progress() | |
| ): | |
| """Edit image based on text prompt""" | |
| if input_image is None: | |
| raise gr.Error("Please upload an image first") | |
| if not prompt or prompt.strip() == "": | |
| raise gr.Error("Please enter an edit instruction") | |
| try: | |
| progress(0.1, desc="Preparing image...") | |
| # Convert to RGB if needed | |
| if input_image.mode != 'RGB': | |
| input_image = input_image.convert('RGB') | |
| progress(0.2, desc="Generating edited image...") | |
| # Set random seed for reproducibility | |
| generator = torch.Generator("cuda" if torch.cuda.is_available() else "cpu").manual_seed(seed) | |
| # Run the pipeline | |
| with torch.inference_mode(): | |
| output = edit_pipe( | |
| input_image, | |
| prompt, | |
| negative_prompt=negative_prompt, | |
| guidance_scale=guidance_scale, | |
| num_inference_steps=num_inference_steps, | |
| num_images_per_prompt=1, | |
| generator=generator | |
| ) | |
| progress(1.0, desc="Done!") | |
| edited_image = output.images[0] | |
| return edited_image | |
| except Exception as e: | |
| raise gr.Error(f"Error during image editing: {str(e)}") | |
| # Example for image editing | |
| edit_example_image_url = "https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.png" | |
| edit_example_data = [ | |
| [edit_example_image_url, "Add a mustache", "", 4.5, 50, 42], | |
| ] | |
| # Examples for text-to-image | |
| t2i_example_prompts = [ | |
| ["一个年轻的亚裔女性,身穿黄色针织衫,搭配白色项链。她的双手放在膝盖上,表情恬静。背景是一堵粗糙的砖墙,午后的阳光温暖地洒在她身上,营造出一种宁静而温馨的氛围。", "", 1344, 768, 4.5, 50, 43, True, True], | |
| ["A serene mountain landscape at sunset with golden clouds", "", 1344, 768, 4.5, 50, 42, True, True], | |
| ["A cute robot sitting at a desk, digital art style", "", 1024, 1024, 4.5, 50, 44, True, True], | |
| ] | |
| # Build Gradio interface | |
| with gr.Blocks(fill_height=True) as demo: | |
| gr.HTML(""" | |
| <div style="text-align: center; margin-bottom: 20px;"> | |
| <h1>🎨 LongCat Image Studio</h1> | |
| <p style="font-size: 16px; color: #666;"> | |
| Generate images from text or edit existing images with AI-powered tools | |
| </p> | |
| <p style="font-size: 14px; margin-top: 10px;"> | |
| Built with <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" style="color: #4A90E2; text-decoration: none;">anycoder</a> | |
| </p> | |
| <p style="font-size: 12px; color: #888; margin-top: 5px;"> | |
| ⚡ Powered by Zero-GPU | 🤗 Models: | |
| <a href="https://huggingface.co/meituan-longcat/LongCat-Image" target="_blank" style="color: #4A90E2;">Text-to-Image</a> & | |
| <a href="https://huggingface.co/meituan-longcat/LongCat-Image-Edit" target="_blank" style="color: #4A90E2;">Image Edit</a> | |
| </p> | |
| </div> | |
| """) | |
| with gr.Tabs(): | |
| # Text-to-Image Tab | |
| with gr.TabItem("🖼️ Text to Image"): | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| gr.Markdown("### 📝 Prompt") | |
| t2i_prompt = gr.Textbox( | |
| label="Image Description", | |
| placeholder="Describe the image you want to generate (supports English and Chinese)", | |
| lines=5 | |
| ) | |
| with gr.Accordion("⚙️ Settings", open=True): | |
| t2i_negative_prompt = gr.Textbox( | |
| label="Negative Prompt (Optional)", | |
| placeholder="What you don't want in the image", | |
| lines=2 | |
| ) | |
| with gr.Row(): | |
| t2i_width = gr.Slider( | |
| minimum=512, | |
| maximum=2048, | |
| value=1344, | |
| step=64, | |
| label="Width", | |
| ) | |
| t2i_height = gr.Slider( | |
| minimum=512, | |
| maximum=2048, | |
| value=768, | |
| step=64, | |
| label="Height", | |
| ) | |
| t2i_guidance_scale = gr.Slider( | |
| minimum=1.0, | |
| maximum=10.0, | |
| value=4.5, | |
| step=0.5, | |
| label="Guidance Scale", | |
| info="Higher values = stronger adherence to prompt" | |
| ) | |
| t2i_num_inference_steps = gr.Slider( | |
| minimum=20, | |
| maximum=100, | |
| value=50, | |
| step=5, | |
| label="Inference Steps", | |
| info="More steps = higher quality but slower" | |
| ) | |
| t2i_seed = gr.Slider( | |
| minimum=0, | |
| maximum=999999, | |
| value=42, | |
| step=1, | |
| label="Random Seed", | |
| ) | |
| t2i_enable_cfg_renorm = gr.Checkbox( | |
| label="Enable CFG Renormalization", | |
| value=True, | |
| info="Improves image quality" | |
| ) | |
| t2i_enable_prompt_rewrite = gr.Checkbox( | |
| label="Enable Prompt Rewrite", | |
| value=True, | |
| info="Uses text encoder as built-in prompt enhancer" | |
| ) | |
| generate_btn = gr.Button("✨ Generate Image", variant="primary", size="lg") | |
| with gr.Column(scale=1): | |
| gr.Markdown("### 🎯 Generated Image") | |
| t2i_output = gr.Image( | |
| label="Output", | |
| type="pil", | |
| height=500, | |
| buttons=["download"] | |
| ) | |
| gr.Markdown("### 💡 Tips") | |
| gr.Markdown(""" | |
| - Be detailed and specific in your descriptions | |
| - Supports both English and Chinese prompts | |
| - Try different aspect ratios for varied compositions | |
| - Enable prompt rewrite for enhanced descriptions | |
| - Higher inference steps = better quality (but slower) | |
| """) | |
| gr.Markdown("### 📝 Example Prompts") | |
| gr.Examples( | |
| examples=t2i_example_prompts, | |
| inputs=[t2i_prompt, t2i_negative_prompt, t2i_width, t2i_height, t2i_guidance_scale, t2i_num_inference_steps, t2i_seed, t2i_enable_cfg_renorm, t2i_enable_prompt_rewrite], | |
| outputs=t2i_output, | |
| fn=generate_image, | |
| cache_examples=False, | |
| label="Click to try these examples" | |
| ) | |
| # Image Edit Tab | |
| with gr.TabItem("✏️ Image Edit"): | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| gr.Markdown("### 📤 Input") | |
| input_image = gr.Image( | |
| label="Upload Image", | |
| type="pil", | |
| sources=["upload", "clipboard"], | |
| height=400 | |
| ) | |
| prompt = gr.Textbox( | |
| label="Edit Instruction", | |
| placeholder="Describe how you want to edit the image", | |
| lines=3 | |
| ) | |
| with gr.Accordion("⚙️ Advanced Settings", open=False): | |
| negative_prompt = gr.Textbox( | |
| label="Negative Prompt (Optional)", | |
| placeholder="What you don't want in the image", | |
| lines=2 | |
| ) | |
| guidance_scale = gr.Slider( | |
| minimum=1.0, | |
| maximum=10.0, | |
| value=4.5, | |
| step=0.5, | |
| label="Guidance Scale", | |
| info="Higher values = stronger adherence to prompt" | |
| ) | |
| num_inference_steps = gr.Slider( | |
| minimum=20, | |
| maximum=100, | |
| value=50, | |
| step=5, | |
| label="Inference Steps", | |
| info="More steps = higher quality but slower" | |
| ) | |
| seed = gr.Slider( | |
| minimum=0, | |
| maximum=999999, | |
| value=42, | |
| step=1, | |
| label="Random Seed", | |
| ) | |
| edit_btn = gr.Button("✨ Edit Image", variant="primary", size="lg") | |
| with gr.Column(scale=1): | |
| gr.Markdown("### 🎯 Output") | |
| output_image = gr.Image( | |
| label="Edited Image", | |
| type="pil", | |
| height=400, | |
| buttons=["download"] | |
| ) | |
| gr.Markdown("### 💡 Tips") | |
| gr.Markdown(""" | |
| - Upload a clear, well-lit image for best results | |
| - Be specific in your edit instructions | |
| - Supports both English and Chinese prompts | |
| - Try different guidance scales for varied results | |
| """) | |
| gr.Markdown("### 📝 Example") | |
| gr.Examples( | |
| examples=edit_example_data, | |
| inputs=[input_image, prompt, negative_prompt, guidance_scale, num_inference_steps, seed], | |
| outputs=output_image, | |
| fn=edit_image, | |
| cache_examples=False, | |
| label="Click to try this example" | |
| ) | |
| gr.HTML(""" | |
| <div style="padding: 10px; background-color: #f0f7ff; border-radius: 8px; margin: 20px 0;"> | |
| <p style="margin: 0; font-size: 12px; color: #555;"> | |
| ⏱️ <strong>Note:</strong> Zero-GPU provides 120 seconds of GPU time per request. | |
| Models are loaded at startup from Hugging Face Hub. | |
| Processing typically takes 30-60 seconds depending on settings. | |
| </p> | |
| </div> | |
| """) | |
| # Event handlers | |
| generate_btn.click( | |
| fn=generate_image, | |
| inputs=[ | |
| t2i_prompt, | |
| t2i_negative_prompt, | |
| t2i_width, | |
| t2i_height, | |
| t2i_guidance_scale, | |
| t2i_num_inference_steps, | |
| t2i_seed, | |
| t2i_enable_cfg_renorm, | |
| t2i_enable_prompt_rewrite | |
| ], | |
| outputs=t2i_output, | |
| api_visibility="public" | |
| ) | |
| edit_btn.click( | |
| fn=edit_image, | |
| inputs=[ | |
| input_image, | |
| prompt, | |
| negative_prompt, | |
| guidance_scale, | |
| num_inference_steps, | |
| seed | |
| ], | |
| outputs=output_image, | |
| api_visibility="public" | |
| ) | |
| # Footer | |
| gr.HTML(""" | |
| <div style="text-align: center; margin-top: 40px; padding: 20px; border-top: 1px solid #eee;"> | |
| <p style="color: #666; font-size: 14px;"> | |
| Powered by <a href="https://huggingface.co/meituan-longcat/LongCat-Image" target="_blank" style="color: #4A90E2;">LongCat Image</a> & | |
| <a href="https://huggingface.co/meituan-longcat/LongCat-Image-Edit" target="_blank" style="color: #4A90E2;">LongCat Image Edit</a> | | |
| <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" style="color: #4A90E2;">Built with anycoder</a> | |
| </p> | |
| </div> | |
| """) | |
| # Launch the app | |
| if __name__ == "__main__": | |
| demo.launch( | |
| theme=gr.themes.Soft( | |
| primary_hue="blue", | |
| secondary_hue="indigo", | |
| neutral_hue="slate", | |
| font=gr.themes.GoogleFont("Inter"), | |
| text_size="lg", | |
| spacing_size="lg", | |
| radius_size="md" | |
| ), | |
| footer_links=[ | |
| {"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"} | |
| ], | |
| mcp_server=True | |
| ) |