LongCat-Image

Running on Zero

App Files Files

akhaliq HF Staff commited on 9 days ago

Commit

d0e03bd

verified ·

1 Parent(s): b8b0baf

Update app.py

Browse files

Files changed (1) hide show

app.py +313 -113

app.py CHANGED Viewed

@@ -4,38 +4,111 @@ import torch
 from PIL import Image
 from transformers import AutoProcessor
 from longcat_image.models import LongCatImageTransformer2DModel
-from longcat_image.pipelines import LongCatImageEditPipeline
 import numpy as np
-# Load model directly at startup
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-model_id = 'meituan-longcat/LongCat-Image-Edit'
-print(f"🔄 Loading model from {model_id}...")
-# Load text processor
-text_processor = AutoProcessor.from_pretrained(
-    model_id,
     subfolder='tokenizer'
 )
-# Load transformer
-transformer = LongCatImageTransformer2DModel.from_pretrained(
-    model_id,
     subfolder='transformer',
     torch_dtype=torch.bfloat16,
     use_safetensors=True
 ).to(device)
-# Load pipeline
-pipe = LongCatImageEditPipeline.from_pretrained(
-    model_id,
-    transformer=transformer,
-    text_processor=text_processor,
 )
-pipe.to(device, torch.bfloat16)
-print(f"✅ Model loaded successfully on {device}")
 @spaces.GPU(duration=120)
 def edit_image(
@@ -69,7 +142,7 @@ def edit_image(
         # Run the pipeline
         with torch.inference_mode():
-            output = pipe(
                 input_image,
                 prompt,
                 negative_prompt=negative_prompt,
@@ -88,124 +161,250 @@ def edit_image(
     except Exception as e:
         raise gr.Error(f"Error during image editing: {str(e)}")
-# Example with image
-example_image_url = "https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.png"
-example_data = [
-    [example_image_url, "add a mustache", "", 4.5, 50, 42],
 ]
 # Build Gradio interface
 with gr.Blocks(fill_height=True) as demo:
     gr.HTML("""
         <div style="text-align: center; margin-bottom: 20px;">
-            <h1>🎨 LongCat Image Edit</h1>
             <p style="font-size: 16px; color: #666;">
-                Transform your images with AI-powered editing using natural language instructions
             </p>
             <p style="font-size: 14px; margin-top: 10px;">
                 Built with <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" style="color: #4A90E2; text-decoration: none;">anycoder</a>
             </p>
             <p style="font-size: 12px; color: #888; margin-top: 5px;">
-                ⚡ Powered by Zero-GPU | 🤗 Model: <a href="https://huggingface.co/meituan-longcat/LongCat-Image-Edit" target="_blank" style="color: #4A90E2;">meituan-longcat/LongCat-Image-Edit</a>
             </p>
         </div>
     """)
-    with gr.Row():
-        with gr.Column(scale=1):
-            gr.Markdown("### 📤 Input")
-            input_image = gr.Image(
-                label="Upload Image",
-                type="pil",
-                sources=["upload", "clipboard"],
-                height=400
-            )
-            prompt = gr.Textbox(
-                label="Edit Instruction",
-                placeholder="Describe how you want to edit the image (e.g., '将猫变成狗' or 'Change the cat to a dog')",
-                lines=3
             )
-            with gr.Accordion("⚙️ Advanced Settings", open=False):
-                negative_prompt = gr.Textbox(
-                    label="Negative Prompt (Optional)",
-                    placeholder="What you don't want in the image",
-                    lines=2
-                )
-                guidance_scale = gr.Slider(
-                    minimum=1.0,
-                    maximum=10.0,
-                    value=4.5,
-                    step=0.5,
-                    label="Guidance Scale",
-                    info="Higher values = stronger adherence to prompt"
-                )
-                num_inference_steps = gr.Slider(
-                    minimum=20,
-                    maximum=100,
-                    value=50,
-                    step=5,
-                    label="Inference Steps",
-                    info="More steps = higher quality but slower"
-                )
-                seed = gr.Slider(
-                    minimum=0,
-                    maximum=999999,
-                    value=42,
-                    step=1,
-                    label="Random Seed",
-                    info="Use same seed for reproducible results"
-                )
-            edit_btn = gr.Button("✨ Edit Image", variant="primary", size="lg")
-            gr.Markdown("""
-            <div style="padding: 10px; background-color: #f0f7ff; border-radius: 8px; margin-top: 10px;">
-                <p style="margin: 0; font-size: 12px; color: #555;">
-                    ⏱️ <strong>Note:</strong> Zero-GPU provides 120 seconds of GPU time per request.
-                    Model is loaded at startup from Hugging Face Hub.
-                    Processing typically takes 30-60 seconds depending on settings.
-                </p>
-            </div>
-            """)
-        with gr.Column(scale=1):
-            gr.Markdown("### 🎯 Output")
-            output_image = gr.Image(
-                label="Edited Image",
-                type="pil",
-                height=400,
-                buttons=["download"]
             )
-            gr.Markdown("### 💡 Tips")
-            gr.Markdown("""
-            - Upload a clear, well-lit image for best results
-            - Be specific in your edit instructions
-            - Supports both English and Chinese prompts
-            - Try different guidance scales for varied results
-            - Higher inference steps = better quality (but slower)
-            - GPU time is limited - optimize your settings for speed
-            - Model loads automatically from Hugging Face Hub
-            """)
-    # Examples section
-    gr.Markdown("### 📝 Example")
-    gr.Examples(
-        examples=example_data,
-        inputs=[input_image, prompt, negative_prompt, guidance_scale, num_inference_steps, seed],
-        outputs=output_image,
-        fn=edit_image,
-        cache_examples=False,
-        label="Click to try this example"
-    )
     # Event handlers
     edit_btn.click(
         fn=edit_image,
         inputs=[
@@ -224,7 +423,8 @@ with gr.Blocks(fill_height=True) as demo:
     gr.HTML("""
         <div style="text-align: center; margin-top: 40px; padding: 20px; border-top: 1px solid #eee;">
             <p style="color: #666; font-size: 14px;">
-                Powered by <a href="https://huggingface.co/meituan-longcat/LongCat-Image-Edit" target="_blank" style="color: #4A90E2;">LongCat Image Edit</a> with Zero-GPU |
                 <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" style="color: #4A90E2;">Built with anycoder</a>
             </p>
         </div>

 from PIL import Image
 from transformers import AutoProcessor
 from longcat_image.models import LongCatImageTransformer2DModel
+from longcat_image.pipelines import LongCatImageEditPipeline, LongCatImagePipeline
 import numpy as np
+# Load models directly at startup
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+# Text-to-Image Model
+t2i_model_id = 'meituan-longcat/LongCat-Image'
+print(f"🔄 Loading Text-to-Image model from {t2i_model_id}...")
+t2i_text_processor = AutoProcessor.from_pretrained(
+    t2i_model_id,
     subfolder='tokenizer'
 )
+t2i_transformer = LongCatImageTransformer2DModel.from_pretrained(
+    t2i_model_id,
     subfolder='transformer',
     torch_dtype=torch.bfloat16,
     use_safetensors=True
 ).to(device)
+t2i_pipe = LongCatImagePipeline.from_pretrained(
+    t2i_model_id,
+    transformer=t2i_transformer,
+    text_processor=t2i_text_processor,
 )
+t2i_pipe.to(device, torch.bfloat16)
+print(f"✅ Text-to-Image model loaded successfully")
+# Image Edit Model
+edit_model_id = 'meituan-longcat/LongCat-Image-Edit'
+print(f"🔄 Loading Image Edit model from {edit_model_id}...")
+edit_text_processor = AutoProcessor.from_pretrained(
+    edit_model_id,
+    subfolder='tokenizer'
+)
+edit_transformer = LongCatImageTransformer2DModel.from_pretrained(
+    edit_model_id,
+    subfolder='transformer',
+    torch_dtype=torch.bfloat16,
+    use_safetensors=True
+).to(device)
+edit_pipe = LongCatImageEditPipeline.from_pretrained(
+    edit_model_id,
+    transformer=edit_transformer,
+    text_processor=edit_text_processor,
+)
+edit_pipe.to(device, torch.bfloat16)
+print(f"✅ Image Edit model loaded successfully on {device}")
+@spaces.GPU(duration=120)
+def generate_image(
+    prompt: str,
+    negative_prompt: str,
+    width: int,
+    height: int,
+    guidance_scale: float,
+    num_inference_steps: int,
+    seed: int,
+    enable_cfg_renorm: bool,
+    enable_prompt_rewrite: bool,
+    progress=gr.Progress()
+):
+    """Generate image from text prompt"""
+    if not prompt or prompt.strip() == "":
+        raise gr.Error("Please enter a prompt")
+    try:
+        progress(0.1, desc="Preparing generation...")
+        progress(0.2, desc="Generating image...")
+        # Set random seed for reproducibility
+        generator = torch.Generator("cuda" if torch.cuda.is_available() else "cpu").manual_seed(seed)
+        # Run the pipeline
+        with torch.inference_mode():
+            output = t2i_pipe(
+                prompt,
+                negative_prompt=negative_prompt,
+                height=height,
+                width=width,
+                guidance_scale=guidance_scale,
+                num_inference_steps=num_inference_steps,
+                num_images_per_prompt=1,
+                generator=generator,
+                enable_cfg_renorm=enable_cfg_renorm,
+                enable_prompt_rewrite=enable_prompt_rewrite
+            )
+        progress(1.0, desc="Done!")
+        generated_image = output.images[0]
+        return generated_image
+    except Exception as e:
+        raise gr.Error(f"Error during image generation: {str(e)}")
 @spaces.GPU(duration=120)
 def edit_image(
         # Run the pipeline
         with torch.inference_mode():
+            output = edit_pipe(
                 input_image,
                 prompt,
                 negative_prompt=negative_prompt,
     except Exception as e:
         raise gr.Error(f"Error during image editing: {str(e)}")
+# Example for image editing
+edit_example_image_url = "https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.png"
+edit_example_data = [
+    [edit_example_image_url, "Add a mustache", "", 4.5, 50, 42],
+]
+# Examples for text-to-image
+t2i_example_prompts = [
+    ["一个年轻的亚裔女性，身穿黄色针织衫，搭配白色项链。她的双手放在膝盖上，表情恬静。背景是一堵粗糙的砖墙，午后的阳光温暖地洒在她身上，营造出一种宁静而温馨的氛围。", "", 1344, 768, 4.5, 50, 43, True, True],
+    ["A serene mountain landscape at sunset with golden clouds", "", 1344, 768, 4.5, 50, 42, True, True],
+    ["A cute robot sitting at a desk, digital art style", "", 1024, 1024, 4.5, 50, 44, True, True],
 ]
 # Build Gradio interface
 with gr.Blocks(fill_height=True) as demo:
     gr.HTML("""
         <div style="text-align: center; margin-bottom: 20px;">
+            <h1>🎨 LongCat Image Studio</h1>
             <p style="font-size: 16px; color: #666;">
+                Generate images from text or edit existing images with AI-powered tools
             </p>
             <p style="font-size: 14px; margin-top: 10px;">
                 Built with <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" style="color: #4A90E2; text-decoration: none;">anycoder</a>
             </p>
             <p style="font-size: 12px; color: #888; margin-top: 5px;">
+                ⚡ Powered by Zero-GPU | 🤗 Models:
+                <a href="https://huggingface.co/meituan-longcat/LongCat-Image" target="_blank" style="color: #4A90E2;">Text-to-Image</a> &
+                <a href="https://huggingface.co/meituan-longcat/LongCat-Image-Edit" target="_blank" style="color: #4A90E2;">Image Edit</a>
             </p>
         </div>
     """)
+    with gr.Tabs():
+        # Text-to-Image Tab
+        with gr.TabItem("🖼️ Text to Image"):
+            with gr.Row():
+                with gr.Column(scale=1):
+                    gr.Markdown("### 📝 Prompt")
+                    t2i_prompt = gr.Textbox(
+                        label="Image Description",
+                        placeholder="Describe the image you want to generate (supports English and Chinese)",
+                        lines=5
+                    )
+                    with gr.Accordion("⚙️ Settings", open=True):
+                        t2i_negative_prompt = gr.Textbox(
+                            label="Negative Prompt (Optional)",
+                            placeholder="What you don't want in the image",
+                            lines=2
+                        )
+                        with gr.Row():
+                            t2i_width = gr.Slider(
+                                minimum=512,
+                                maximum=2048,
+                                value=1344,
+                                step=64,
+                                label="Width",
+                            )
+                            t2i_height = gr.Slider(
+                                minimum=512,
+                                maximum=2048,
+                                value=768,
+                                step=64,
+                                label="Height",
+                            )
+                        t2i_guidance_scale = gr.Slider(
+                            minimum=1.0,
+                            maximum=10.0,
+                            value=4.5,
+                            step=0.5,
+                            label="Guidance Scale",
+                            info="Higher values = stronger adherence to prompt"
+                        )
+                        t2i_num_inference_steps = gr.Slider(
+                            minimum=20,
+                            maximum=100,
+                            value=50,
+                            step=5,
+                            label="Inference Steps",
+                            info="More steps = higher quality but slower"
+                        )
+                        t2i_seed = gr.Slider(
+                            minimum=0,
+                            maximum=999999,
+                            value=42,
+                            step=1,
+                            label="Random Seed",
+                        )
+                        t2i_enable_cfg_renorm = gr.Checkbox(
+                            label="Enable CFG Renormalization",
+                            value=True,
+                            info="Improves image quality"
+                        )
+                        t2i_enable_prompt_rewrite = gr.Checkbox(
+                            label="Enable Prompt Rewrite",
+                            value=True,
+                            info="Uses text encoder as built-in prompt enhancer"
+                        )
+                    generate_btn = gr.Button("✨ Generate Image", variant="primary", size="lg")
+                with gr.Column(scale=1):
+                    gr.Markdown("### 🎯 Generated Image")
+                    t2i_output = gr.Image(
+                        label="Output",
+                        type="pil",
+                        height=500,
+                        buttons=["download"]
+                    )
+                    gr.Markdown("### 💡 Tips")
+                    gr.Markdown("""
+                    - Be detailed and specific in your descriptions
+                    - Supports both English and Chinese prompts
+                    - Try different aspect ratios for varied compositions
+                    - Enable prompt rewrite for enhanced descriptions
+                    - Higher inference steps = better quality (but slower)
+                    """)
+            gr.Markdown("### 📝 Example Prompts")
+            gr.Examples(
+                examples=t2i_example_prompts,
+                inputs=[t2i_prompt, t2i_negative_prompt, t2i_width, t2i_height, t2i_guidance_scale, t2i_num_inference_steps, t2i_seed, t2i_enable_cfg_renorm, t2i_enable_prompt_rewrite],
+                outputs=t2i_output,
+                fn=generate_image,
+                cache_examples=False,
+                label="Click to try these examples"
             )
+        # Image Edit Tab
+        with gr.TabItem("✏️ Image Edit"):
+            with gr.Row():
+                with gr.Column(scale=1):
+                    gr.Markdown("### 📤 Input")
+                    input_image = gr.Image(
+                        label="Upload Image",
+                        type="pil",
+                        sources=["upload", "clipboard"],
+                        height=400
+                    )
+                    prompt = gr.Textbox(
+                        label="Edit Instruction",
+                        placeholder="Describe how you want to edit the image",
+                        lines=3
+                    )
+                    with gr.Accordion("⚙️ Advanced Settings", open=False):
+                        negative_prompt = gr.Textbox(
+                            label="Negative Prompt (Optional)",
+                            placeholder="What you don't want in the image",
+                            lines=2
+                        )
+                        guidance_scale = gr.Slider(
+                            minimum=1.0,
+                            maximum=10.0,
+                            value=4.5,
+                            step=0.5,
+                            label="Guidance Scale",
+                            info="Higher values = stronger adherence to prompt"
+                        )
+                        num_inference_steps = gr.Slider(
+                            minimum=20,
+                            maximum=100,
+                            value=50,
+                            step=5,
+                            label="Inference Steps",
+                            info="More steps = higher quality but slower"
+                        )
+                        seed = gr.Slider(
+                            minimum=0,
+                            maximum=999999,
+                            value=42,
+                            step=1,
+                            label="Random Seed",
+                        )
+                    edit_btn = gr.Button("✨ Edit Image", variant="primary", size="lg")
+                with gr.Column(scale=1):
+                    gr.Markdown("### 🎯 Output")
+                    output_image = gr.Image(
+                        label="Edited Image",
+                        type="pil",
+                        height=400,
+                        buttons=["download"]
+                    )
+                    gr.Markdown("### 💡 Tips")
+                    gr.Markdown("""
+                    - Upload a clear, well-lit image for best results
+                    - Be specific in your edit instructions
+                    - Supports both English and Chinese prompts
+                    - Try different guidance scales for varied results
+                    """)
+            gr.Markdown("### 📝 Example")
+            gr.Examples(
+                examples=edit_example_data,
+                inputs=[input_image, prompt, negative_prompt, guidance_scale, num_inference_steps, seed],
+                outputs=output_image,
+                fn=edit_image,
+                cache_examples=False,
+                label="Click to try this example"
             )
+    gr.HTML("""
+        <div style="padding: 10px; background-color: #f0f7ff; border-radius: 8px; margin: 20px 0;">
+            <p style="margin: 0; font-size: 12px; color: #555;">
+                ⏱️ <strong>Note:</strong> Zero-GPU provides 120 seconds of GPU time per request.
+                Models are loaded at startup from Hugging Face Hub.
+                Processing typically takes 30-60 seconds depending on settings.
+            </p>
+        </div>
+    """)
     # Event handlers
+    generate_btn.click(
+        fn=generate_image,
+        inputs=[
+            t2i_prompt,
+            t2i_negative_prompt,
+            t2i_width,
+            t2i_height,
+            t2i_guidance_scale,
+            t2i_num_inference_steps,
+            t2i_seed,
+            t2i_enable_cfg_renorm,
+            t2i_enable_prompt_rewrite
+        ],
+        outputs=t2i_output,
+        api_visibility="public"
+    )
     edit_btn.click(
         fn=edit_image,
         inputs=[
     gr.HTML("""
         <div style="text-align: center; margin-top: 40px; padding: 20px; border-top: 1px solid #eee;">
             <p style="color: #666; font-size: 14px;">
+                Powered by <a href="https://huggingface.co/meituan-longcat/LongCat-Image" target="_blank" style="color: #4A90E2;">LongCat Image</a> &
+                <a href="https://huggingface.co/meituan-longcat/LongCat-Image-Edit" target="_blank" style="color: #4A90E2;">LongCat Image Edit</a> |
                 <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" style="color: #4A90E2;">Built with anycoder</a>
             </p>
         </div>