Spaces:
Running
on
Zero
Running
on
Zero
File size: 9,059 Bytes
99582fb de41cc1 51779c5 de41cc1 36abdd4 de41cc1 d2176ba 40c2e73 51779c5 85df522 de41cc1 d0e03bd be5446b 85df522 d0e03bd be5446b 85df522 be5446b 85df522 2cefbad d0e03bd 85df522 2cefbad 85df522 d0e03bd be5446b d0e03bd be5446b d0e03bd be5446b d0e03bd cf3fdd8 d0e03bd cf3fdd8 4002df7 d0e03bd 4002df7 d0e03bd 2cefbad 4002df7 ed65bd6 4002df7 de41cc1 6e4901a de41cc1 6e4901a de41cc1 d0e03bd de41cc1 be5446b de41cc1 4670eb3 de41cc1 f81c51a 4002df7 de41cc1 d2176ba 4002df7 be5446b 4002df7 16c8236 4002df7 36abdd4 4002df7 d0e03bd 4002df7 de41cc1 4002df7 d0e03bd 4002df7 be5446b 4002df7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 |
import spaces
import gradio as gr
import torch
from PIL import Image
from transformers import AutoProcessor
from longcat_image.models import LongCatImageTransformer2DModel
from longcat_image.pipelines import LongCatImageEditPipeline, LongCatImagePipeline
import numpy as np
import random
import os
import requests
import tempfile
import shutil
from urllib.parse import urlparse
MAX_SEED = np.iinfo(np.int32).max
MAX_IMAGE_SIZE = 2048
# --- Model Loading ---
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Text-to-Image Model
t2i_model_id = 'meituan-longcat/LongCat-Image'
print(f"π Loading Text-to-Image model from {t2i_model_id}...")
t2i_text_processor = AutoProcessor.from_pretrained(
t2i_model_id,
subfolder='tokenizer'
)
t2i_transformer = LongCatImageTransformer2DModel.from_pretrained(
t2i_model_id,
subfolder='transformer',
torch_dtype=torch.bfloat16,
use_safetensors=True
).to(device)
pipe = LongCatImagePipeline.from_pretrained(
t2i_model_id,
transformer=t2i_transformer,
text_processor=t2i_text_processor,
)
pipe.to(device, torch.bfloat16)
print(f"β
Text-to-Image model loaded successfully")
# Image Edit Model
edit_model_id = 'meituan-longcat/LongCat-Image-Edit'
print(f"π Loading Image Edit model from {edit_model_id}...")
edit_text_processor = AutoProcessor.from_pretrained(
edit_model_id,
subfolder='tokenizer'
)
edit_transformer = LongCatImageTransformer2DModel.from_pretrained(
edit_model_id,
subfolder='transformer',
torch_dtype=torch.bfloat16,
use_safetensors=True
).to(device)
edit_pipe = LongCatImageEditPipeline.from_pretrained(
edit_model_id,
transformer=edit_transformer,
text_processor=edit_text_processor,
)
edit_pipe.to(device, torch.bfloat16)
print(f"β
Image Edit model loaded successfully on {device}")
def load_lora_auto(pipe, lora_input):
lora_input = lora_input.strip()
if not lora_input:
return
# If it's just an ID like "author/model"
if "/" in lora_input and not lora_input.startswith("http"):
pipe.load_lora_weights(lora_input)
return
if lora_input.startswith("http"):
url = lora_input
# Repo page (no blob/resolve)
if "huggingface.co" in url and "/blob/" not in url and "/resolve/" not in url:
repo_id = urlparse(url).path.strip("/")
pipe.load_lora_weights(repo_id)
return
# Blob link β convert to resolve link
if "/blob/" in url:
url = url.replace("/blob/", "/resolve/")
# Download direct file
tmp_dir = tempfile.mkdtemp()
local_path = os.path.join(tmp_dir, os.path.basename(urlparse(url).path))
try:
print(f"Downloading LoRA from {url}...")
resp = requests.get(url, stream=True)
resp.raise_for_status()
with open(local_path, "wb") as f:
for chunk in resp.iter_content(chunk_size=8192):
f.write(chunk)
print(f"Saved LoRA to {local_path}")
pipe.load_lora_weights(local_path)
finally:
shutil.rmtree(tmp_dir, ignore_errors=True)
@spaces.GPU(duration=120)
def infer(prompt, seed=42, randomize_seed=False, width=1024, height=1024, guidance_scale=4, num_inference_steps=28, lora_id=None, lora_scale=0.95, progress=gr.Progress(track_tqdm=True)):
if randomize_seed:
seed = random.randint(0, MAX_SEED)
generator = torch.Generator().manual_seed(seed)
if lora_id and lora_id.strip() != "":
pipe.unload_lora_weights()
load_lora_auto(pipe, lora_id)
try:
image = pipe(
prompt=prompt,
negative_prompt="",
width=width,
height=height,
num_inference_steps=num_inference_steps,
generator=generator,
guidance_scale=guidance_scale
).images[0]
print("Image Generation Completed for: ", prompt, lora_id)
return image, seed
finally:
# Unload LoRA weights if they were loaded
if lora_id:
pipe.unload_lora_weights()
@spaces.GPU(duration=120)
def edit_image(
input_image: Image.Image,
prompt: str,
seed: int,
progress=gr.Progress()
):
"""Edit image based on text prompt"""
if input_image is None:
raise gr.Error("Please upload an image first")
if not prompt or prompt.strip() == "":
raise gr.Error("Please enter an edit instruction")
try:
progress(0.1, desc="Preparing image...")
if input_image.mode != 'RGB':
input_image = input_image.convert('RGB')
progress(0.2, desc="Generating edited image...")
generator = torch.Generator("cuda" if torch.cuda.is_available() else "cpu").manual_seed(seed)
with torch.inference_mode():
output = edit_pipe(
input_image,
prompt,
negative_prompt="",
guidance_scale=4.5,
num_inference_steps=50,
num_images_per_prompt=1,
generator=generator
)
progress(1.0, desc="Done!")
return output.images[0]
except Exception as e:
raise gr.Error(f"Error during image editing: {str(e)}")
examples = [
"a tiny astronaut hatching from an egg on the moon",
"a cat holding a sign that says hello world",
"an anime illustration of a wiener schnitzel",
]
css = """
#col-container {
margin: 0 auto;
max-width: 960px;
}
.generate-btn {
background: linear-gradient(90deg, #4B79A1 0%, #283E51 100%) !important;
border: none !important;
color: white !important;
}
.generate-btn:hover {
transform: translateY(-2px);
box-shadow: 0 5px 15px rgba(0,0,0,0.2);
}
"""
with gr.Blocks(css=css) as app:
gr.HTML("<center><h1>LongCat-Image 6B</h1></center>")
with gr.Column(elem_id="col-container"):
with gr.Row():
with gr.Column():
with gr.Row():
text_prompt = gr.Textbox(label="Prompt", placeholder="Enter a prompt here", lines=3, elem_id="prompt-text-input")
# with gr.Row():
# custom_lora = gr.Textbox(label="Custom LoRA (optional)", info="URL or the path to the LoRA weights", placeholder="kudzueye/boreal-qwen-image")
with gr.Row():
with gr.Accordion("Advanced Settings", open=False):
with gr.Row():
custom_lora = gr.Textbox(label="Custom LoRA (optional)", info="URL or the path to the LoRA weights", placeholder="kudzueye/boreal-qwen-image")
lora_scale = gr.Slider(
label="LoRA Scale",
minimum=0,
maximum=2,
step=0.01,
value=1,
)
with gr.Row():
width = gr.Slider(label="Width", value=1024, minimum=64, maximum=2048, step=16)
height = gr.Slider(label="Height", value=1024, minimum=64, maximum=2048, step=16)
seed = gr.Slider(label="Seed", value=-1, minimum=-1, maximum=4294967296, step=1)
randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
with gr.Row():
steps = gr.Slider(label="Inference steps steps", value=28, minimum=1, maximum=100, step=1)
cfg = gr.Slider(label="Guidance Scale", value=4.5, minimum=1, maximum=20, step=0.5)
# method = gr.Radio(label="Sampling method", value="DPM++ 2M Karras", choices=["DPM++ 2M Karras", "DPM++ SDE Karras", "Euler", "Euler a", "Heun", "DDIM"])
with gr.Row():
# text_button = gr.Button("Run", variant='primary', elem_id="gen-button")
text_button = gr.Button("β¨ Generate Image", variant='primary', elem_classes=["generate-btn"])
with gr.Column():
with gr.Row():
image_output = gr.Image(type="pil", label="Image Output", elem_id="gallery")
# gr.Markdown(article_text)
with gr.Column():
gr.Examples(
examples = examples,
inputs = [text_prompt],
)
gr.on(
triggers=[text_button.click, text_prompt.submit],
fn = infer,
inputs=[text_prompt, seed, randomize_seed, width, height, cfg, steps, custom_lora, lora_scale],
outputs=[image_output, seed]
)
# text_button.click(query, inputs=[custom_lora, text_prompt, steps, cfg, randomize_seed, seed, width, height], outputs=[image_output,seed_output, seed])
# text_button.click(infer, inputs=[text_prompt, seed, randomize_seed, width, height, cfg, steps, custom_lora, lora_scale], outputs=[image_output,seed_output, seed])
app.launch(share=True, mcp_server=True) |