Qwen-Image-to-LoRA

Running on Zero

Alexander Bagus

bf1b8d0 2 days ago

7.68 kB

	import gradio as gr
	import numpy as np
	import torch, random, json, spaces, time
	from diffsynth.pipelines.qwen_image import (
	QwenImagePipeline, ModelConfig,
	QwenImageUnit_Image2LoRAEncode, QwenImageUnit_Image2LoRADecode
	)
	from safetensors.torch import save_file
	import torch
	from PIL import Image
	# from utils import repo_utils, image_utils, prompt_utils


	# repo_utils.clone_repo_if_not_exists("https://github.com/apple/ml-starflow.git", "app/models")
	# repo_utils.clone_repo_if_not_exists("https://huggingface.co/apple/starflow", "app/models")

	DTYPE = torch.bfloat16
	MAX_SEED = np.iinfo(np.int32).max

	vram_config_disk_offload = {
	"offload_dtype": "disk",
	"offload_device": "disk",
	"onload_dtype": "disk",
	"onload_device": "disk",
	"preparing_dtype": torch.bfloat16,
	"preparing_device": "cuda",
	"computation_dtype": torch.bfloat16,
	"computation_device": "cuda",
	}

	# Load models
	pipe = QwenImagePipeline.from_pretrained(
	torch_dtype=torch.bfloat16,
	device="cuda",
	model_configs=[
	ModelConfig(model_id="DiffSynth-Studio/General-Image-Encoders", origin_file_pattern="SigLIP2-G384/model.safetensors", **vram_config_disk_offload),
	ModelConfig(model_id="DiffSynth-Studio/General-Image-Encoders", origin_file_pattern="DINOv3-7B/model.safetensors", **vram_config_disk_offload),
	ModelConfig(model_id="DiffSynth-Studio/Qwen-Image-i2L", origin_file_pattern="Qwen-Image-i2L-Style.safetensors", **vram_config_disk_offload),
	],
	processor_config=ModelConfig(model_id="Qwen/Qwen-Image-Edit", origin_file_pattern="processor/"),
	vram_limit=torch.cuda.mem_get_info("cuda")[1] / (1024 ** 3) - 0.5,
	)



	# pipe = ZImageControlPipeline(
	# vae=vae,
	# tokenizer=tokenizer,
	# text_encoder=text_encoder,
	# transformer=transformer,
	# scheduler=scheduler,
	# )
	# pipe.to("cuda", DTYPE)


	# def prepare(prompt, is_polish_prompt):
	# if not is_polish_prompt: return prompt, False
	# polished_prompt = prompt_utils.polish_prompt(prompt)
	# return polished_prompt, True


	@spaces.GPU
	def inference(
	prompt,
	negative_prompt,
	seed=42,
	randomize_seed=True,
	guidance_scale=1.5,
	num_inference_steps=8,
	progress=gr.Progress(track_tqdm=True),
	):
	timestamp = time.time()
	print(f"timestamp: {timestamp}")

	# Load images
	images = [
	Image.open("examples/style/1/0.jpg"),
	Image.open("examples/style/1/1.jpg"),
	Image.open("examples/style/1/2.jpg"),
	Image.open("examples/style/1/3.jpg"),
	Image.open("examples/style/1/4.jpg"),
	]


	# Model inference
	with torch.no_grad():
	embs = QwenImageUnit_Image2LoRAEncode().process(pipe, image2lora_images=images)
	lora = QwenImageUnit_Image2LoRADecode().process(pipe, **embs)["lora"]

	save_file(lora, "model_style.safetensors")

	return True

	# # process image
	# print("DEBUG: process image")
	# if input_image is None:
	# print("Error: input_image is empty.")
	# return None


	# print("DEBUG: control_image_torch")
	# orig_width, orig_height = input_image.size
	# control_image, width, height = image_utils.rescale_image(input_image, image_scale, 16, 2048)
	# control_image_torch = image_utils.get_image_latent(control_image, sample_size=[height, width])[:, :, 0]

	# # generation
	# if randomize_seed: seed = random.randint(0, MAX_SEED)
	# generator = torch.Generator().manual_seed(seed)

	# output_image = pipe(
	# prompt=prompt,
	# negative_prompt = negative_prompt,
	# width=width,
	# height=height,
	# generator=generator,
	# guidance_scale=guidance_scale,
	# control_image=control_image_torch,
	# num_inference_steps=num_inference_steps,
	# control_context_scale=control_context_scale,
	# ).images[0]

	# output_image = output_image.resize((orig_width * image_scale, orig_height * image_scale))
	# return output_image, seed


	def read_file(path: str) -> str:
	with open(path, 'r', encoding='utf-8') as f:
	content = f.read()
	return content


	css = """
	#col-container {
	margin: 0 auto;
	max-width: 960px;
	}
	"""

	with open('examples/0_examples.json', 'r') as file: examples = json.load(file)

	with gr.Blocks() as demo:
	with gr.Column(elem_id="col-container"):
	with gr.Column():
	gr.HTML(read_file("static/header.html"))
	with gr.Row():
	with gr.Column():

	prompt = gr.Textbox(
	label="Prompt",
	show_label=False,
	lines=2,
	placeholder="Enter your prompt",
	value="a man in a fishing boat. high quality, detailed"
	# container=False,
	)
	# is_polish_prompt = gr.Checkbox(label="Polish prompt", value=True)
	# control_mode = gr.Radio(
	# choices=["Canny", "Depth", "HED", "MLSD", "Pose"],
	# value="Canny",
	# label="Control Mode"
	# )
	run_button = gr.Button("Generate", variant="primary")
	with gr.Accordion("Advanced Settings", open=False):

	negative_prompt = gr.Textbox(
	label="Negative prompt",
	lines=2,
	container=False,
	placeholder="Enter your negative prompt",
	value="blurry, ugly, bad"
	)
	with gr.Row():
	num_inference_steps = gr.Slider(
	label="Steps",
	minimum=1,
	maximum=30,
	step=1,
	value=9,
	)
	control_context_scale = gr.Slider(
	label="Context scale",
	minimum=0.0,
	maximum=1.0,
	step=0.01,
	value=0.75,
	)

	with gr.Row():
	guidance_scale = gr.Slider(
	label="Guidance scale",
	minimum=0.0,
	maximum=10.0,
	step=0.1,
	value=1.0,
	)

	seed = gr.Slider(
	label="Seed",
	minimum=0,
	maximum=MAX_SEED,
	step=1,
	value=42,
	)
	randomize_seed = gr.Checkbox(label="Randomize seed", value=False)

	with gr.Column():
	output_image = gr.Image(label="Generated image", show_label=False)
	# polished_prompt = gr.Textbox(label="Polished prompt", interactive=False)

	# with gr.Accordion("Preprocessor output", open=False):
	# control_image = gr.Image(label="Control image", show_label=False)


	# gr.Examples(examples=examples, inputs=[input_image])
	gr.Markdown(read_file("static/footer.md"))

	run_button.click(
	fn=inference,
	inputs=[
	prompt,
	negative_prompt,
	seed,
	randomize_seed,
	guidance_scale,
	num_inference_steps,
	],
	outputs=[output_image, seed],
	)


	if __name__ == "__main__":
	demo.launch(mcp_server=True, css=css)