akhaliq's picture
akhaliq HF Staff
Update app.py
9a223bb verified
raw
history blame
19 kB
import gradio as gr
import numpy as np
import torch, random, json, spaces, time
from ulid import ULID
from diffsynth.pipelines.qwen_image import (
QwenImagePipeline, ModelConfig,
QwenImageUnit_Image2LoRAEncode, QwenImageUnit_Image2LoRADecode
)
from safetensors.torch import save_file
import torch
from PIL import Image
from utils import repo_utils, image_utils, prompt_utils
# repo_utils.clone_repo_if_not_exists("git clone https://huggingface.co/DiffSynth-Studio/General-Image-Encoders", "app/repos")
# repo_utils.clone_repo_if_not_exists("https://huggingface.co/apple/starflow", "app/models")
URL_PUBLIC = "https://huggingface.co/spaces/AiSudo/Qwen-Image-to-LoRA/blob/main"
DTYPE = torch.bfloat16
MAX_SEED = np.iinfo(np.int32).max
vram_config_disk_offload = {
"offload_dtype": "disk",
"offload_device": "disk",
"onload_dtype": "disk",
"onload_device": "disk",
"preparing_dtype": torch.bfloat16,
"preparing_device": "cuda",
"computation_dtype": torch.bfloat16,
"computation_device": "cuda",
}
# Load models
pipe_lora = QwenImagePipeline.from_pretrained(
torch_dtype=torch.bfloat16,
device="cuda",
model_configs=[
ModelConfig(
download_source="huggingface",
model_id="DiffSynth-Studio/General-Image-Encoders",
origin_file_pattern="SigLIP2-G384/model.safetensors",
**vram_config_disk_offload
),
ModelConfig(
download_source="huggingface",
model_id="DiffSynth-Studio/General-Image-Encoders",
origin_file_pattern="DINOv3-7B/model.safetensors",
**vram_config_disk_offload
),
ModelConfig(
download_source="huggingface",
model_id="DiffSynth-Studio/Qwen-Image-i2L",
origin_file_pattern="Qwen-Image-i2L-Style.safetensors",
**vram_config_disk_offload
),
],
processor_config=ModelConfig(model_id="Qwen/Qwen-Image-Edit", origin_file_pattern="processor/"),
vram_limit=torch.cuda.mem_get_info("cuda")[1] / (1024 ** 3) - 0.5,
)
vram_config = {
"offload_dtype": "disk",
"offload_device": "disk",
"onload_dtype": torch.bfloat16,
"onload_device": "cuda",
"preparing_dtype": torch.bfloat16,
"preparing_device": "cuda",
"computation_dtype": torch.bfloat16,
"computation_device": "cuda",
}
pipe_imagen = QwenImagePipeline.from_pretrained(
torch_dtype=torch.bfloat16,
device="cuda",
model_configs=[
ModelConfig(download_source="huggingface", model_id="Qwen/Qwen-Image", origin_file_pattern="transformer/diffusion_pytorch_model*.safetensors", **vram_config),
ModelConfig(download_source="huggingface", model_id="Qwen/Qwen-Image", origin_file_pattern="text_encoder/model*.safetensors", **vram_config),
ModelConfig(download_source="huggingface", model_id="Qwen/Qwen-Image", origin_file_pattern="vae/diffusion_pytorch_model.safetensors", **vram_config),
],
tokenizer_config=ModelConfig(download_source="huggingface", model_id="Qwen/Qwen-Image", origin_file_pattern="tokenizer/"),
vram_limit=torch.cuda.mem_get_info("cuda")[1] / (1024 ** 3) - 0.5,
)
@spaces.GPU
def generate_lora(
input_images,
progress=gr.Progress(track_tqdm=True),
):
ulid = str(ULID()).lower()[:12]
print(f"ulid: {ulid}")
if not input_images:
print("images are empty.")
return False
input_images = [Image.open(filepath).convert("RGB") for filepath, _ in input_images]
# Model inference
with torch.no_grad():
embs = QwenImageUnit_Image2LoRAEncode().process(pipe_lora, image2lora_images=input_images)
lora = QwenImageUnit_Image2LoRADecode().process(pipe_lora, **embs)["lora"]
lora_name = f"{ulid}.safetensors"
lora_path = f"loras/{lora_name}"
save_file(lora, lora_path)
return lora_name, gr.update(interactive=True, value=lora_path), gr.update(interactive=True)
@spaces.GPU
def generate_image(
lora_name,
prompt,
negative_prompt="blurry ugly bad",
width=1024,
height=1024,
seed=42,
randomize_seed=True,
guidance_scale=3.5,
num_inference_steps=8,
progress=gr.Progress(track_tqdm=True),
):
lora_path = f"loras/{lora_name}"
pipe_imagen.clear_lora()
pipe_imagen.load_lora(pipe_imagen.dit, lora_path)
if randomize_seed:
seed = random.randint(0, MAX_SEED)
generator = torch.Generator().manual_seed(seed)
output_image = pipe_imagen(
prompt=prompt,
negative_prompt=negative_prompt,
num_inference_steps=num_inference_steps,
width=width,
height=height,
# generator=generator,
# true_cfg_scale=guidance_scale,
# guidance_scale=1.0 # Use a fixed default for distilled guidance
)
return output_image, seed
return True
def read_file(path: str) -> str:
with open(path, 'r', encoding='utf-8') as f:
content = f.read()
return content
# Enhanced Apple-style CSS - more minimalist and clean
css = """
/* Pure Apple Design System */
.gradio-container {
font-family: -apple-system, BlinkMacSystemFont, "SF Pro Display", "SF Pro Text", "Helvetica Neue", Helvetica, Arial, sans-serif !important;
background: #ffffff !important;
color: #1d1d1f !important;
line-height: 1.47059 !important;
font-weight: 400 !important;
letter-spacing: -.022em !important;
}
#col-container {
margin: 0 auto;
max-width: 980px;
padding: 40px 20px;
}
/* Ultra-minimalist header */
.gradio-container .gr-block-header {
background: transparent !important;
border: none !important;
padding: 0 !important;
margin-bottom: 60px !important;
box-shadow: none !important;
}
.gradio-container h1 {
font-weight: 600 !important;
font-size: 3rem !important;
color: #1d1d1f !important;
text-align: center !important;
margin-bottom: 16px !important;
letter-spacing: -.003em !important;
}
.gradio-container .subtitle {
font-size: 1.25rem !important;
font-weight: 400 !important;
color: #6e6e73 !important;
text-align: center !important;
margin-bottom: 8px !important;
line-height: 1.4 !important;
}
/* Clean card sections */
.section-card {
background: #f2f2f7 !important;
border-radius: 18px !important;
padding: 32px !important;
margin-bottom: 32px !important;
border: none !important;
box-shadow: none !important;
}
/* Apple-style buttons */
.gradio-container .gr-button {
background: #007aff !important;
border: none !important;
border-radius: 8px !important;
color: white !important;
font-weight: 500 !important;
font-size: 17px !important;
padding: 16px 32px !important;
min-height: 44px !important;
transition: all 0.15s ease !important;
box-shadow: none !important;
letter-spacing: -.022em !important;
}
.gradio-container .gr-button:hover {
background: #0051d5 !important;
transform: none !important;
box-shadow: none !important;
}
.gradio-container .gr-button:active {
background: #004bb8 !important;
transform: scale(0.98) !important;
}
/* Clean input fields */
.gradio-container .gr-textbox,
.gradio-container .gr-slider {
background: #ffffff !important;
border: 1px solid #d2d2d7 !important;
border-radius: 10px !important;
padding: 12px 16px !important;
font-size: 17px !important;
color: #1d1d1f !important;
transition: all 0.15s ease !important;
min-height: 44px !important;
}
.gradio-container .gr-textbox:focus,
.gradio-container .gr-slider:focus {
border-color: #007aff !important;
box-shadow: 0 0 0 3px rgba(0, 122, 255, 0.1) !important;
outline: none !important;
}
/* Gallery styling */
.gradio-container .gr-gallery {
border-radius: 12px !important;
border: 1px solid #d2d2d7 !important;
background: #ffffff !important;
overflow: hidden !important;
}
/* Image output */
.gradio-container .gr-image {
border-radius: 12px !important;
border: 1px solid #d2d2d7 !important;
background: #ffffff !important;
overflow: hidden !important;
}
/* Accordion - Apple style */
.gradio-container .gr-accordion {
background: #f2f2f7 !important;
border: none !important;
border-radius: 12px !important;
padding: 0 !important;
margin-top: 24px !important;
}
.gradio-container .gr-accordion .gr-accordion-button {
background: transparent !important;
border: none !important;
padding: 16px !important;
font-weight: 500 !important;
color: #1d1d1f !important;
}
/* Download button */
.gradio-container .gr-download-button {
background: #34c759 !important;
border: none !important;
border-radius: 8px !important;
color: white !important;
font-weight: 500 !important;
font-size: 17px !important;
padding: 16px 32px !important;
min-height: 44px !important;
}
.gradio-container .gr-download-button:hover {
background: #30a14a !important;
}
/* Checkbox styling */
.gradio-container .gr-checkbox {
background: #ffffff !important;
border: 2px solid #d2d2d7 !important;
border-radius: 6px !important;
width: 24px !important;
height: 24px !important;
}
.gradio-container .gr-checkbox.checked {
background: #007aff !important;
border-color: #007aff !important;
}
/* Examples section */
.gradio-container .gr-examples {
background: #f2f2f7 !important;
border-radius: 18px !important;
padding: 24px !important;
border: none !important;
}
/* Label styling */
.gradio-container .gr-label {
font-weight: 600 !important;
color: #1d1d1f !important;
font-size: 17px !important;
margin-bottom: 8px !important;
letter-spacing: -.022em !important;
}
/* Mobile responsiveness */
@media (max-width: 768px) {
#col-container {
padding: 20px 16px !important;
max-width: 100% !important;
}
.gradio-container h1 {
font-size: 2rem !important;
margin-bottom: 12px !important;
}
.gradio-container .subtitle {
font-size: 1.1rem !important;
}
.section-card {
padding: 24px !important;
margin-bottom: 24px !important;
}
.gradio-container .gr-button {
padding: 14px 28px !important;
font-size: 16px !important;
}
.gradio-container .gr-gallery {
height: 200px !important;
columns: 2 !important;
}
.gradio-container .gr-row {
flex-direction: column !important;
gap: 20px !important;
}
}
@media (max-width: 480px) {
.gradio-container h1 {
font-size: 1.75rem !important;
}
.section-card {
padding: 20px !important;
}
.gradio-container .gr-gallery {
height: 180px !important;
columns: 1 !important;
}
}
/* Remove all animations for Apple-like snappiness */
.gradio-container * {
transition: background 0.15s ease, border-color 0.15s ease, color 0.15s ease !important;
}
/* Hide unnecessary elements */
.gradio-container .gr-footer,
.gradio-container .gr-header {
display: none !important;
}
/* Clean scrollbar */
.gradio-container ::-webkit-scrollbar {
width: 3px !important;
}
.gradio-container ::-webkit-scrollbar-track {
background: transparent !important;
}
.gradio-container ::-webkit-scrollbar-thumb {
background: #d2d2d7 !important;
border-radius: 3px !important;
}
.gradio-container ::-webkit-scrollbar-thumb:hover {
background: #007aff !important;
}
"""
with open('examples/0_examples.json', 'r') as file: examples = json.load(file)
print(examples)
# Gradio 6 syntax - no parameters in Blocks()
with gr.Blocks() as demo:
with gr.Column(elem_id="col-container"):
with gr.Column():
# Ultra-minimalist Apple-style header
gr.HTML("""
<div style="text-align: center; max-width: 600px; margin: 0 auto;">
<h1>Qwen Image to LoRA</h1>
<p class="subtitle">Generate custom LoRA models from your images</p>
<p style="font-size: 14px; color: #86868b; margin-top: 16px;">
Demo by <a href="https://aisudo.com/" target="_blank" style="color: #007aff; text-decoration: none;">AiSudo</a> •
<a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" style="color: #007aff; text-decoration: none;">Built with anycoder</a>
</p>
</div>
""")
with gr.Row():
with gr.Column(elem_classes=["section-card"]):
input_images = gr.Gallery(
label="Input Images",
file_types=["image"],
show_label=True,
columns=2,
object_fit="cover",
height=250)
lora_button = gr.Button("Generate LoRA", size="lg")
with gr.Column(elem_classes=["section-card"]):
lora_name = gr.Textbox(
label="Generated LoRA",
lines=2,
interactive=False,
placeholder="Your LoRA will appear here..."
)
lora_download = gr.DownloadButton(
label="Download LoRA",
interactive=False,
size="lg"
)
with gr.Column(elem_classes=["section-card"]) as imagen_container:
gr.Markdown("### Generate Images")
with gr.Row():
with gr.Column():
prompt = gr.Textbox(
label="Prompt",
lines=2,
placeholder="Describe what you want to generate...",
value="a person in a fishing boat.",
)
imagen_button = gr.Button("Generate Image", interactive=False, size="lg")
with gr.Accordion("Settings", open=False):
negative_prompt = gr.Textbox(
label="Negative Prompt",
lines=1,
placeholder="What to avoid...",
value="blurry, low quality"
)
num_inference_steps = gr.Slider(
label="Steps",
minimum=1,
maximum=50,
step=1,
value=25,
)
guidance_scale = gr.Slider(
label="Guidance Scale",
minimum=1.0,
maximum=10.0,
step=0.1,
value=3.5,
)
with gr.Row():
width = gr.Slider(
label="Width",
minimum=512,
maximum=1280,
step=32,
value=768,
)
height = gr.Slider(
label="Height",
minimum=512,
maximum=1280,
step=32,
value=1024,
)
seed = gr.Slider(
label="Seed",
minimum=0,
maximum=MAX_SEED,
step=1,
value=42,
)
randomize_seed = gr.Checkbox(label="Randomize Seed", value=False)
with gr.Column():
output_image = gr.Image(
label="Generated Image",
height=350
)
gr.Examples(
examples=examples,
inputs=[input_images],
label="Examples"
)
gr.Markdown(read_file("static/footer.md"))
lora_button.click(
fn=generate_lora,
inputs=[
input_images
],
outputs=[lora_name, lora_download, imagen_button],
api_visibility="public"
)
imagen_button.click(
fn=generate_image,
inputs=[
lora_name,
prompt,
negative_prompt,
width,
height,
seed,
randomize_seed,
guidance_scale,
num_inference_steps,
],
outputs=[output_image, seed],
api_visibility="public"
)
if __name__ == "__main__":
# Gradio 6 syntax - all launch parameters go here
demo.launch(
css=css, # Moved from Blocks() to launch() - Gradio 6 syntax
mcp_server=True,
theme=gr.themes.Base(
primary_hue="blue",
secondary_hue="gray",
neutral_hue="gray",
font=[
gr.themes.GoogleFont("Inter"),
"ui-sans-serif",
"system-ui",
"sans-serif"
],
font_mono=[
gr.themes.GoogleFont("JetBrains Mono"),
"ui-monospace",
"Consolas",
"monospace"
]
).set(
body_background_fill="white",
body_background_fill_dark="#000000",
button_primary_background_fill="#007aff",
button_primary_background_fill_hover="#0051d5",
button_primary_text_color="white",
button_secondary_background_fill="#f2f2f7",
button_secondary_background_fill_hover="#e5e5ea",
block_background_fill="white",
block_background_fill_dark="#1c1c1e",
block_border_width="0px",
block_border_color="#d2d2d7",
block_border_color_dark="#38383a",
block_radius="18px",
block_shadow="none",
block_label_text_color="#1d1d1f",
block_label_text_color_dark="#f2f2f2",
block_title_text_color="#1d1d1f",
block_title_text_color_dark="#f2f2f2",
input_background_fill="white",
input_background_fill_dark="#1c1c1e",
input_border_color="#d2d2d7",
input_border_color_dark="#38383a",
input_border_width="1px",
input_radius="10px",
input_shadow="none",
input_text_color="#1d1d1f",
input_text_color_dark="#f2f2f2",
),
footer_links=[
{"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"}
]
)