import gradio as gr import numpy as np import os, random, json, spaces, torch, time, subprocess import torch from transformers import AutoProcessor from longcat_image.models import LongCatImageTransformer2DModel from longcat_image.pipelines import LongCatImagePipeline from utils.image_utils import rescale_image from utils.prompt_utils import polish_prompt # GIT_DIR = "LongCat-Image" # GIT_URL = "https://github.com/yourusername/LongCat-Image.git" # if not os.path.isdir(GIT_DIR): # subprocess.run(["git", "clone", GIT_URL]) # else: # print("Folder already exists.") def prepare(prompt, is_polish_prompt): if not is_polish_prompt: return prompt, False polished_prompt = polish_prompt(prompt) return polished_prompt, True @spaces.GPU def inference( prompt, negative_prompt, input_image, image_scale=1.0, control_mode='Canny', control_context_scale = 0.75, seed=42, randomize_seed=True, guidance_scale=1.5, num_inference_steps=8, progress=gr.Progress(track_tqdm=True), ): # timestamp = time.time() # print(f"timestamp: {timestamp}") # # process image # print("DEBUG: process image") # if input_image is None: # print("Error: input_image is empty.") # return None # # input_image, width, height = scale_image(input_image, image_scale) # # control_mode='HED' # processor_id = 'canny' # if control_mode == 'HED': # processor_id = 'softedge_hed' # if control_mode =='Depth': # processor_id = 'depth_midas' # if control_mode =='MLSD': # processor_id = 'mlsd' # if control_mode =='Pose': # processor_id = 'openpose_full' # print(f"DEBUG: processor_id={processor_id}") # processor = Processor(processor_id) # # Width must be divisible by 16 # control_image, width, height = rescale_image(input_image, image_scale, 16) # control_image = control_image.resize((1024, 1024)) # print("DEBUG: processor running") # control_image = processor(control_image, to_pil=True) # control_image = control_image.resize((width, height)) # print("DEBUG: control_image_torch") # control_image_torch = get_image_latent(control_image, sample_size=[height, width])[:, :, 0] # # generation # if randomize_seed: seed = random.randint(0, MAX_SEED) # generator = torch.Generator().manual_seed(seed) # image = pipe( # prompt=prompt, # negative_prompt = negative_prompt, # height=height, # width=width, # generator=generator, # guidance_scale=guidance_scale, # control_image=control_image_torch, # num_inference_steps=num_inference_steps, # control_context_scale=control_context_scale, # ).images[0] # return image, seed, control_image return True def read_file(path: str) -> str: with open(path, 'r', encoding='utf-8') as f: content = f.read() return content css = """ #col-container { margin: 0 auto; max-width: 960px; } """ with open('static/data.json', 'r') as file: data = json.load(file) examples = data['examples'] with gr.Blocks(css=css) as demo: with gr.Column(elem_id="col-container"): with gr.Column(): gr.HTML(read_file("static/header.html")) with gr.Row(): with gr.Column(): input_image = gr.Image( height=290, sources=['upload', 'clipboard'], image_mode='RGB', # elem_id="image_upload", type="pil", label="Upload") prompt = gr.Textbox( label="Prompt", show_label=False, lines=2, placeholder="Enter your prompt", # container=False, ) is_polish_prompt = gr.Checkbox(label="Polish prompt", value=True) control_mode = gr.Radio( choices=["Canny", "Depth", "HED", "MLSD", "Pose"], value="Canny", label="Control Mode" ) run_button = gr.Button("Generate", variant="primary") with gr.Accordion("Advanced Settings", open=False): negative_prompt = gr.Textbox( label="Negative prompt", lines=2, container=False, placeholder="Enter your negative prompt", value="blurry ugly bad" ) with gr.Row(): num_inference_steps = gr.Slider( label="Steps", minimum=1, maximum=30, step=1, value=9, ) control_context_scale = gr.Slider( label="Context scale", minimum=0.0, maximum=1.0, step=0.01, value=0.75, ) with gr.Row(): guidance_scale = gr.Slider( label="Guidance scale", minimum=0.0, maximum=10.0, step=0.1, value=1.0, ) image_scale = gr.Slider( label="Image scale", minimum=0.5, maximum=2.0, step=0.1, value=1.0, ) seed = gr.Slider( label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42, ) randomize_seed = gr.Checkbox(label="Randomize seed", value=False) with gr.Column(): output_image = gr.Image(label="Generated image", show_label=False) polished_prompt = gr.Textbox(label="Polished prompt", interactive=False) with gr.Accordion("Preprocessor output", open=False): control_image = gr.Image(label="Control image", show_label=False) gr.Examples(examples=examples, inputs=[input_image, prompt, control_mode]) gr.Markdown(read_file("static/footer.md")) run_button.click( fn=prepare, inputs=[prompt, is_polish_prompt], outputs=[polished_prompt, is_polish_prompt] # outputs=gr.State(), # Pass to the next function, not to UI at this step ).then( fn=inference, inputs=[ polished_prompt, negative_prompt, input_image, image_scale, control_mode, control_context_scale, seed, randomize_seed, guidance_scale, num_inference_steps, ], outputs=[output_image, seed, control_image], ) if __name__ == "__main__": demo.launch(mcp_server=True)