Alexander Bagus
22
bf1b8d0
raw
history blame
7.68 kB
import gradio as gr
import numpy as np
import torch, random, json, spaces, time
from diffsynth.pipelines.qwen_image import (
QwenImagePipeline, ModelConfig,
QwenImageUnit_Image2LoRAEncode, QwenImageUnit_Image2LoRADecode
)
from safetensors.torch import save_file
import torch
from PIL import Image
# from utils import repo_utils, image_utils, prompt_utils
# repo_utils.clone_repo_if_not_exists("https://github.com/apple/ml-starflow.git", "app/models")
# repo_utils.clone_repo_if_not_exists("https://huggingface.co/apple/starflow", "app/models")
DTYPE = torch.bfloat16
MAX_SEED = np.iinfo(np.int32).max
vram_config_disk_offload = {
"offload_dtype": "disk",
"offload_device": "disk",
"onload_dtype": "disk",
"onload_device": "disk",
"preparing_dtype": torch.bfloat16,
"preparing_device": "cuda",
"computation_dtype": torch.bfloat16,
"computation_device": "cuda",
}
# Load models
pipe = QwenImagePipeline.from_pretrained(
torch_dtype=torch.bfloat16,
device="cuda",
model_configs=[
ModelConfig(model_id="DiffSynth-Studio/General-Image-Encoders", origin_file_pattern="SigLIP2-G384/model.safetensors", **vram_config_disk_offload),
ModelConfig(model_id="DiffSynth-Studio/General-Image-Encoders", origin_file_pattern="DINOv3-7B/model.safetensors", **vram_config_disk_offload),
ModelConfig(model_id="DiffSynth-Studio/Qwen-Image-i2L", origin_file_pattern="Qwen-Image-i2L-Style.safetensors", **vram_config_disk_offload),
],
processor_config=ModelConfig(model_id="Qwen/Qwen-Image-Edit", origin_file_pattern="processor/"),
vram_limit=torch.cuda.mem_get_info("cuda")[1] / (1024 ** 3) - 0.5,
)
# pipe = ZImageControlPipeline(
# vae=vae,
# tokenizer=tokenizer,
# text_encoder=text_encoder,
# transformer=transformer,
# scheduler=scheduler,
# )
# pipe.to("cuda", DTYPE)
# def prepare(prompt, is_polish_prompt):
# if not is_polish_prompt: return prompt, False
# polished_prompt = prompt_utils.polish_prompt(prompt)
# return polished_prompt, True
@spaces.GPU
def inference(
prompt,
negative_prompt,
seed=42,
randomize_seed=True,
guidance_scale=1.5,
num_inference_steps=8,
progress=gr.Progress(track_tqdm=True),
):
timestamp = time.time()
print(f"timestamp: {timestamp}")
# Load images
images = [
Image.open("examples/style/1/0.jpg"),
Image.open("examples/style/1/1.jpg"),
Image.open("examples/style/1/2.jpg"),
Image.open("examples/style/1/3.jpg"),
Image.open("examples/style/1/4.jpg"),
]
# Model inference
with torch.no_grad():
embs = QwenImageUnit_Image2LoRAEncode().process(pipe, image2lora_images=images)
lora = QwenImageUnit_Image2LoRADecode().process(pipe, **embs)["lora"]
save_file(lora, "model_style.safetensors")
return True
# # process image
# print("DEBUG: process image")
# if input_image is None:
# print("Error: input_image is empty.")
# return None
# print("DEBUG: control_image_torch")
# orig_width, orig_height = input_image.size
# control_image, width, height = image_utils.rescale_image(input_image, image_scale, 16, 2048)
# control_image_torch = image_utils.get_image_latent(control_image, sample_size=[height, width])[:, :, 0]
# # generation
# if randomize_seed: seed = random.randint(0, MAX_SEED)
# generator = torch.Generator().manual_seed(seed)
# output_image = pipe(
# prompt=prompt,
# negative_prompt = negative_prompt,
# width=width,
# height=height,
# generator=generator,
# guidance_scale=guidance_scale,
# control_image=control_image_torch,
# num_inference_steps=num_inference_steps,
# control_context_scale=control_context_scale,
# ).images[0]
# output_image = output_image.resize((orig_width * image_scale, orig_height * image_scale))
# return output_image, seed
def read_file(path: str) -> str:
with open(path, 'r', encoding='utf-8') as f:
content = f.read()
return content
css = """
#col-container {
margin: 0 auto;
max-width: 960px;
}
"""
with open('examples/0_examples.json', 'r') as file: examples = json.load(file)
with gr.Blocks() as demo:
with gr.Column(elem_id="col-container"):
with gr.Column():
gr.HTML(read_file("static/header.html"))
with gr.Row():
with gr.Column():
prompt = gr.Textbox(
label="Prompt",
show_label=False,
lines=2,
placeholder="Enter your prompt",
value="a man in a fishing boat. high quality, detailed"
# container=False,
)
# is_polish_prompt = gr.Checkbox(label="Polish prompt", value=True)
# control_mode = gr.Radio(
# choices=["Canny", "Depth", "HED", "MLSD", "Pose"],
# value="Canny",
# label="Control Mode"
# )
run_button = gr.Button("Generate", variant="primary")
with gr.Accordion("Advanced Settings", open=False):
negative_prompt = gr.Textbox(
label="Negative prompt",
lines=2,
container=False,
placeholder="Enter your negative prompt",
value="blurry, ugly, bad"
)
with gr.Row():
num_inference_steps = gr.Slider(
label="Steps",
minimum=1,
maximum=30,
step=1,
value=9,
)
control_context_scale = gr.Slider(
label="Context scale",
minimum=0.0,
maximum=1.0,
step=0.01,
value=0.75,
)
with gr.Row():
guidance_scale = gr.Slider(
label="Guidance scale",
minimum=0.0,
maximum=10.0,
step=0.1,
value=1.0,
)
seed = gr.Slider(
label="Seed",
minimum=0,
maximum=MAX_SEED,
step=1,
value=42,
)
randomize_seed = gr.Checkbox(label="Randomize seed", value=False)
with gr.Column():
output_image = gr.Image(label="Generated image", show_label=False)
# polished_prompt = gr.Textbox(label="Polished prompt", interactive=False)
# with gr.Accordion("Preprocessor output", open=False):
# control_image = gr.Image(label="Control image", show_label=False)
# gr.Examples(examples=examples, inputs=[input_image])
gr.Markdown(read_file("static/footer.md"))
run_button.click(
fn=inference,
inputs=[
prompt,
negative_prompt,
seed,
randomize_seed,
guidance_scale,
num_inference_steps,
],
outputs=[output_image, seed],
)
if __name__ == "__main__":
demo.launch(mcp_server=True, css=css)