import spaces import gradio as gr import torch from PIL import Image from transformers import AutoProcessor from longcat_image.models import LongCatImageTransformer2DModel from longcat_image.pipelines import LongCatImageEditPipeline, LongCatImagePipeline import numpy as np import random import os import requests import tempfile import shutil from urllib.parse import urlparse MAX_SEED = np.iinfo(np.int32).max MAX_IMAGE_SIZE = 2048 # --- Model Loading --- device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Text-to-Image Model t2i_model_id = 'meituan-longcat/LongCat-Image' print(f"🔄 Loading Text-to-Image model from {t2i_model_id}...") t2i_text_processor = AutoProcessor.from_pretrained( t2i_model_id, subfolder='tokenizer' ) t2i_transformer = LongCatImageTransformer2DModel.from_pretrained( t2i_model_id, subfolder='transformer', torch_dtype=torch.bfloat16, use_safetensors=True ).to(device) pipe = LongCatImagePipeline.from_pretrained( t2i_model_id, transformer=t2i_transformer, text_processor=t2i_text_processor, ) pipe.to(device, torch.bfloat16) print(f"✅ Text-to-Image model loaded successfully") # Image Edit Model edit_model_id = 'meituan-longcat/LongCat-Image-Edit' print(f"🔄 Loading Image Edit model from {edit_model_id}...") edit_text_processor = AutoProcessor.from_pretrained( edit_model_id, subfolder='tokenizer' ) edit_transformer = LongCatImageTransformer2DModel.from_pretrained( edit_model_id, subfolder='transformer', torch_dtype=torch.bfloat16, use_safetensors=True ).to(device) edit_pipe = LongCatImageEditPipeline.from_pretrained( edit_model_id, transformer=edit_transformer, text_processor=edit_text_processor, ) edit_pipe.to(device, torch.bfloat16) print(f"✅ Image Edit model loaded successfully on {device}") def load_lora_auto(pipe, lora_input): lora_input = lora_input.strip() if not lora_input: return # If it's just an ID like "author/model" if "/" in lora_input and not lora_input.startswith("http"): pipe.load_lora_weights(lora_input) return if lora_input.startswith("http"): url = lora_input # Repo page (no blob/resolve) if "huggingface.co" in url and "/blob/" not in url and "/resolve/" not in url: repo_id = urlparse(url).path.strip("/") pipe.load_lora_weights(repo_id) return # Blob link → convert to resolve link if "/blob/" in url: url = url.replace("/blob/", "/resolve/") # Download direct file tmp_dir = tempfile.mkdtemp() local_path = os.path.join(tmp_dir, os.path.basename(urlparse(url).path)) try: print(f"Downloading LoRA from {url}...") resp = requests.get(url, stream=True) resp.raise_for_status() with open(local_path, "wb") as f: for chunk in resp.iter_content(chunk_size=8192): f.write(chunk) print(f"Saved LoRA to {local_path}") pipe.load_lora_weights(local_path) finally: shutil.rmtree(tmp_dir, ignore_errors=True) @spaces.GPU(duration=120) def infer(prompt, seed=42, randomize_seed=False, width=1024, height=1024, guidance_scale=4, num_inference_steps=28, lora_id=None, lora_scale=0.95, progress=gr.Progress(track_tqdm=True)): if randomize_seed: seed = random.randint(0, MAX_SEED) generator = torch.Generator().manual_seed(seed) if lora_id and lora_id.strip() != "": pipe.unload_lora_weights() load_lora_auto(pipe, lora_id) try: image = pipe( prompt=prompt, negative_prompt="", width=width, height=height, num_inference_steps=num_inference_steps, generator=generator, guidance_scale=guidance_scale ).images[0] print("Image Generation Completed for: ", prompt, lora_id) return image, seed finally: # Unload LoRA weights if they were loaded if lora_id: pipe.unload_lora_weights() @spaces.GPU(duration=120) def edit_image( input_image: Image.Image, prompt: str, seed: int, progress=gr.Progress() ): """Edit image based on text prompt""" if input_image is None: raise gr.Error("Please upload an image first") if not prompt or prompt.strip() == "": raise gr.Error("Please enter an edit instruction") try: progress(0.1, desc="Preparing image...") if input_image.mode != 'RGB': input_image = input_image.convert('RGB') progress(0.2, desc="Generating edited image...") generator = torch.Generator("cuda" if torch.cuda.is_available() else "cpu").manual_seed(seed) with torch.inference_mode(): output = edit_pipe( input_image, prompt, negative_prompt="", guidance_scale=4.5, num_inference_steps=50, num_images_per_prompt=1, generator=generator ) progress(1.0, desc="Done!") return output.images[0] except Exception as e: raise gr.Error(f"Error during image editing: {str(e)}") examples = [ "a tiny astronaut hatching from an egg on the moon", "a cat holding a sign that says hello world", "an anime illustration of a wiener schnitzel", ] css = """ #col-container { margin: 0 auto; max-width: 960px; } .generate-btn { background: linear-gradient(90deg, #4B79A1 0%, #283E51 100%) !important; border: none !important; color: white !important; } .generate-btn:hover { transform: translateY(-2px); box-shadow: 0 5px 15px rgba(0,0,0,0.2); } """ with gr.Blocks(css=css) as app: gr.HTML("