TextPecker
Collection
Rewarding Structural Anomaly Quantification for Enhancing Visual Text Rendering
•
7 items
•
Updated
•
1
This model is trained using Flow-GRPO with LoRA. We provide only the LoRA weights here, so you will need to download the Qwen-Image base model first.
import os
import torch
from diffusers import DiffusionPipeline
from safetensors.torch import load_file
from peft import LoraConfig, get_peft_model
os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
os.environ["DIFFUSERS_DISABLE_NATIVE_ATTENTION"] = "1"
def load_model(model_path, ckpt_path=None, use_lora=True):
torch_dtype = torch.bfloat16
device = "cuda"
pipe = DiffusionPipeline.from_pretrained(
model_path,
torch_dtype=torch_dtype,
).to(device)
pipe.safety_checker = None
if ckpt_path is not None and use_lora:
target_modules = [
"attn.to_k", "attn.to_q", "attn.to_v", "attn.to_out.0",
"attn.add_k_proj", "attn.add_q_proj", "attn.add_v_proj", "attn.to_add_out",
"img_mlp.net.0.proj", "img_mlp.net.2",
"txt_mlp.net.0.proj", "txt_mlp.net.2",
]
transformer_lora_config = LoraConfig(
r=64,
lora_alpha=128,
init_lora_weights="gaussian",
target_modules=target_modules,
)
pipe.transformer = get_peft_model(pipe.transformer, transformer_lora_config)
model_state_dict = load_file(ckpt_path, device="cpu")
pipe.transformer.load_state_dict(model_state_dict, strict=False)
print(f"successfully load lora: {ckpt_path}")
return pipe
model_id = "Qwen/Qwen-Image"
lora_ckpt_path = "CIawevy/QwenImage-TextPecker-SQPA"
device = "cuda"
negative_prompt = " "
aspect_ratios = {
"1:1": (1328, 1328),
"16:9": (1664, 928),
"9:16": (928, 1664),
}
width, height = aspect_ratios["1:1"]
num_inference_steps = 50
true_cfg_scale = 4.0
pipe = load_model(model_id, lora_ckpt_path)
prompt = 'a weathered cave explorers journal page, with the phrase "TextPecker" prominently written in faded ink, surrounded by sketches of ancient ruins and cryptic symbols, under a dim, mystical light.'
image = pipe(
prompt=prompt,
negative_prompt=negative_prompt,
width=width,
height=height,
num_inference_steps=num_inference_steps,
true_cfg_scale=true_cfg_scale,
generator=torch.Generator(device=device).manual_seed(42)
).images[0]
image.save("TextPecker_qwen_demo.png")
print("img has been saved to: TextPecker_qwen_demo.png")
Base model
Qwen/Qwen-Image