LongCat-Image

Running on Zero

LongCat-Image / app.py

Alexander Bagus

53a42bb 9 days ago

7.37 kB

	import gradio as gr
	import numpy as np
	import os, random, json, spaces, torch, time, subprocess

	import torch
	from transformers import AutoProcessor
	from longcat_image.models import LongCatImageTransformer2DModel
	from longcat_image.pipelines import LongCatImagePipeline

	from utils.image_utils import rescale_image
	from utils.prompt_utils import polish_prompt


	# GIT_DIR = "LongCat-Image"
	# GIT_URL = "https://github.com/yourusername/LongCat-Image.git"

	# if not os.path.isdir(GIT_DIR):
	# subprocess.run(["git", "clone", GIT_URL])
	# else:
	# print("Folder already exists.")


	def prepare(prompt, is_polish_prompt):
	if not is_polish_prompt: return prompt, False
	polished_prompt = polish_prompt(prompt)
	return polished_prompt, True

	@spaces.GPU
	def inference(
	prompt,
	negative_prompt,
	input_image,
	image_scale=1.0,
	control_mode='Canny',
	control_context_scale = 0.75,
	seed=42,
	randomize_seed=True,
	guidance_scale=1.5,
	num_inference_steps=8,
	progress=gr.Progress(track_tqdm=True),
	):
	# timestamp = time.time()
	# print(f"timestamp: {timestamp}")

	# # process image
	# print("DEBUG: process image")
	# if input_image is None:
	# print("Error: input_image is empty.")
	# return None

	# # input_image, width, height = scale_image(input_image, image_scale)
	# # control_mode='HED'
	# processor_id = 'canny'
	# if control_mode == 'HED':
	# processor_id = 'softedge_hed'
	# if control_mode =='Depth':
	# processor_id = 'depth_midas'
	# if control_mode =='MLSD':
	# processor_id = 'mlsd'
	# if control_mode =='Pose':
	# processor_id = 'openpose_full'

	# print(f"DEBUG: processor_id={processor_id}")
	# processor = Processor(processor_id)

	# # Width must be divisible by 16
	# control_image, width, height = rescale_image(input_image, image_scale, 16)
	# control_image = control_image.resize((1024, 1024))

	# print("DEBUG: processor running")
	# control_image = processor(control_image, to_pil=True)
	# control_image = control_image.resize((width, height))

	# print("DEBUG: control_image_torch")
	# control_image_torch = get_image_latent(control_image, sample_size=[height, width])[:, :, 0]

	# # generation
	# if randomize_seed: seed = random.randint(0, MAX_SEED)
	# generator = torch.Generator().manual_seed(seed)

	# image = pipe(
	# prompt=prompt,
	# negative_prompt = negative_prompt,
	# height=height,
	# width=width,
	# generator=generator,
	# guidance_scale=guidance_scale,
	# control_image=control_image_torch,
	# num_inference_steps=num_inference_steps,
	# control_context_scale=control_context_scale,
	# ).images[0]

	# return image, seed, control_image
	return True


	def read_file(path: str) -> str:
	with open(path, 'r', encoding='utf-8') as f:
	content = f.read()
	return content


	css = """
	#col-container {
	margin: 0 auto;
	max-width: 960px;
	}
	"""

	with open('static/data.json', 'r') as file:
	data = json.load(file)
	examples = data['examples']

	with gr.Blocks(css=css) as demo:
	with gr.Column(elem_id="col-container"):
	with gr.Column():
	gr.HTML(read_file("static/header.html"))
	with gr.Row():
	with gr.Column():
	input_image = gr.Image(
	height=290, sources=['upload', 'clipboard'],
	image_mode='RGB',
	# elem_id="image_upload",
	type="pil", label="Upload")

	prompt = gr.Textbox(
	label="Prompt",
	show_label=False,
	lines=2,
	placeholder="Enter your prompt",
	# container=False,
	)
	is_polish_prompt = gr.Checkbox(label="Polish prompt", value=True)
	control_mode = gr.Radio(
	choices=["Canny", "Depth", "HED", "MLSD", "Pose"],
	value="Canny",
	label="Control Mode"
	)
	run_button = gr.Button("Generate", variant="primary")
	with gr.Accordion("Advanced Settings", open=False):

	negative_prompt = gr.Textbox(
	label="Negative prompt",
	lines=2,
	container=False,
	placeholder="Enter your negative prompt",
	value="blurry ugly bad"
	)
	with gr.Row():
	num_inference_steps = gr.Slider(
	label="Steps",
	minimum=1,
	maximum=30,
	step=1,
	value=9,
	)
	control_context_scale = gr.Slider(
	label="Context scale",
	minimum=0.0,
	maximum=1.0,
	step=0.01,
	value=0.75,
	)

	with gr.Row():
	guidance_scale = gr.Slider(
	label="Guidance scale",
	minimum=0.0,
	maximum=10.0,
	step=0.1,
	value=1.0,
	)

	image_scale = gr.Slider(
	label="Image scale",
	minimum=0.5,
	maximum=2.0,
	step=0.1,
	value=1.0,
	)

	seed = gr.Slider(
	label="Seed",
	minimum=0,
	maximum=MAX_SEED,
	step=1,
	value=42,
	)
	randomize_seed = gr.Checkbox(label="Randomize seed", value=False)

	with gr.Column():
	output_image = gr.Image(label="Generated image", show_label=False)
	polished_prompt = gr.Textbox(label="Polished prompt", interactive=False)

	with gr.Accordion("Preprocessor output", open=False):
	control_image = gr.Image(label="Control image", show_label=False)



	gr.Examples(examples=examples, inputs=[input_image, prompt, control_mode])
	gr.Markdown(read_file("static/footer.md"))

	run_button.click(
	fn=prepare,
	inputs=[prompt, is_polish_prompt],
	outputs=[polished_prompt, is_polish_prompt]
	# outputs=gr.State(), # Pass to the next function, not to UI at this step
	).then(
	fn=inference,
	inputs=[
	polished_prompt,
	negative_prompt,
	input_image,
	image_scale,
	control_mode,
	control_context_scale,
	seed,
	randomize_seed,
	guidance_scale,
	num_inference_steps,
	],
	outputs=[output_image, seed, control_image],
	)

	if __name__ == "__main__":
	demo.launch(mcp_server=True)