import gradio as gr import numpy as np import random import spaces from PIL import Image # import spaces #[uncomment to use ZeroGPU] import torch from transformers import AutoTokenizer, AutoModel from models.gen_pipeline import NextStepPipeline from utils.aspect_ratio import center_crop_arr_with_buckets HF_HUB = "stepfun-ai/NextStep-1-Large" device = "cuda" if torch.cuda.is_available() else "cpu" tokenizer = AutoTokenizer.from_pretrained(HF_HUB, local_files_only=False, trust_remote_code=True) model = AutoModel.from_pretrained(HF_HUB, local_files_only=False, trust_remote_code=True) pipeline = NextStepPipeline(tokenizer=tokenizer, model=model).to(device=device) MAX_SEED = np.iinfo(np.int16).max MAX_IMAGE_SIZE = 512 DEFAULT_POSITIVE_PROMPT = None DEFAULT_NEGATIVE_PROMPT = "copy the original image" @spaces.GPU(duration=300) def infer( prompt=None, ref=None, seed=0, text_cfg=7.5, img_cfg=1.0, num_inference_steps=30, positive_prompt=DEFAULT_POSITIVE_PROMPT, negative_prompt=DEFAULT_NEGATIVE_PROMPT, progress=gr.Progress(track_tqdm=True), ): #if ref is None: # gr.Warning("⚠️ Please upload an image!") # return None if prompt in [None, ""]: gr.Warning("⚠️ Please enter a prompt!") return None if ref is not None: editing_caption = "" + prompt input_image = ref input_image = center_crop_arr_with_buckets(input_image, buckets=[512]) else: editing_caption = prompt input_image = None img_cfg = 1.0 image = pipeline.generate_image( captions=editing_caption, num_images_per_caption=1, positive_prompt=positive_prompt, negative_prompt=negative_prompt, hw=(input_image.size[1], input_image.size[0]), cfg=text_cfg, cfg_img=img_cfg, cfg_schedule="constant", use_norm=True, num_sampling_steps=num_inference_steps, seed=seed, progress=True, ) return image[0] css = """ #col-container { margin: 0 auto; max-width: 800px; } """ with gr.Blocks(css=css) as demo: with gr.Column(elem_id="col-container"): gr.Markdown(" # NextStep-1-Large-Edit") with gr.Row(): prompt = gr.Text( label="Prompt", show_label=False, max_lines=1, placeholder="Enter your prompt", container=False, ) run_button = gr.Button("Run", scale=0, variant="primary") with gr.Row(): #ref = gr.Image(label="Reference Image", show_label=True, type="pil", height=400) with gr.Accordion("Advanced Settings", open=True): positive_prompt = gr.Text( label="Positive Prompt", show_label=False, max_lines=1, placeholder="Enter your positive prompt", container=False, ) negative_prompt = gr.Text( label="Negative Prompt", show_label=False, max_lines=2, placeholder="Enter your negative prompt", container=False, ) with gr.Row(): seed = gr.Slider( label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42, ) num_inference_steps = gr.Slider( label="# sampling steps", minimum=10, maximum=50, step=1, value=30, # Replace with defaults that work for your model ) with gr.Row(): text_cfg = gr.Slider( label="Text cfg", minimum=1.0, maximum=15.0, step=0.1, value=7.5, # Replace with defaults that work for your model ) img_cfg = gr.Slider( label="Image cfg", minimum=1.0, maximum=15.0, step=0.1, value=2.0, # Replace with defaults that work for your model ) with gr.Row(): result_1 = gr.Image(label="Result 1", show_label=False, container=True, height=400, visible=False) #result_2 = gr.Image(label="Result 2", show_label=False, container=True, height=400, visible=False) #gr.Examples(examples=examples, inputs=[prompt, ref]) def show_result(): return gr.update(visible=True), gr.update(visible=True) gr.on( triggers=[run_button.click, prompt.submit], fn=infer, inputs=[ prompt, #ref, seed, text_cfg, img_cfg, num_inference_steps, positive_prompt, negative_prompt, ], outputs=[result_1], ) gr.on( triggers=[run_button.click, prompt.submit], fn=show_result, outputs=[result_1], ) if __name__ == "__main__": demo.launch()