Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,30 +1,136 @@
|
|
| 1 |
-
import
|
| 2 |
import torch
|
| 3 |
-
|
|
|
|
| 4 |
from PIL import Image
|
| 5 |
-
import
|
| 6 |
-
import
|
| 7 |
-
|
| 8 |
-
#
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
-
#
|
| 19 |
-
|
| 20 |
|
| 21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
|
|
|
|
|
|
|
| 30 |
demo.launch()
|
|
|
|
| 1 |
+
import os
|
| 2 |
import torch
|
| 3 |
+
import math
|
| 4 |
+
import gradio as gr
|
| 5 |
from PIL import Image
|
| 6 |
+
from diffusers import QwenImageEditPlusPipeline, FlowMatchEulerDiscreteScheduler
|
| 7 |
+
from huggingface_hub import WebhooksServer
|
| 8 |
+
|
| 9 |
+
# Configuración de variables globales del sistema
|
| 10 |
+
BASE_MODEL = "Qwen/Qwen-Image-Edit-2509"
|
| 11 |
+
LORA_COSPLAY = "joyfox/Qwen-Image-Edit-Cosplay"
|
| 12 |
+
LORA_LIGHTNING = "lightx2v/Qwen-Image-Lightning"
|
| 13 |
+
|
| 14 |
+
# Inicialización de la configuración del planificador para pocos pasos de inferencia
|
| 15 |
+
scheduler_config = {
|
| 16 |
+
"base_image_seq_len": 256,
|
| 17 |
+
"invert_sigmas": False,
|
| 18 |
+
"max_image_seq_len": 8192,
|
| 19 |
+
"max_shift": math.log(3),
|
| 20 |
+
"num_train_timesteps": 1000,
|
| 21 |
+
"shift": 1.0,
|
| 22 |
+
"shift_terminal": None,
|
| 23 |
+
"stochastic_sampling": False,
|
| 24 |
+
"time_shift_type": "exponential",
|
| 25 |
+
"use_beta_sigmas": False,
|
| 26 |
+
"use_dynamic_shifting": True,
|
| 27 |
+
"use_exponential_sigmas": False,
|
| 28 |
+
"use_karras_sigmas": False,
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
print("Cargando planificador de flujo de acoplamiento...")
|
| 32 |
+
scheduler = FlowMatchEulerDiscreteScheduler.from_config(scheduler_config)
|
| 33 |
+
|
| 34 |
+
print("Inicializando tubería multireferencia QwenImageEditPlusPipeline...")
|
| 35 |
+
pipe = QwenImageEditPlusPipeline.from_pretrained(
|
| 36 |
+
BASE_MODEL,
|
| 37 |
+
scheduler=scheduler,
|
| 38 |
+
torch_dtype=torch.bfloat16
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
# Descarga de pesos utilizando la API de Hugging Face e instanciación en GPU
|
| 42 |
+
pipe.to("cuda")
|
| 43 |
+
|
| 44 |
+
# Diccionario para controlar el estado de carga perezosa de la LoRA de cosplay
|
| 45 |
+
loaded_adapters = {}
|
| 46 |
+
|
| 47 |
+
def load_cosplay_adapters_on_demand():
|
| 48 |
+
"""Carga de forma perezosa el adaptador LoRA de cosplay para optimizar la memoria"""
|
| 49 |
+
if "cosplay" not in loaded_adapters:
|
| 50 |
+
print("Carga perezosa activada: Descargando pesos de joyfox/Qwen-Image-Edit-Cosplay...")
|
| 51 |
+
pipe.load_lora_weights(LORA_COSPLAY, adapter_name="cosplay")
|
| 52 |
+
|
| 53 |
+
print("Cargando adaptador de velocidad Qwen-Image-Lightning...")
|
| 54 |
+
pipe.load_lora_weights(
|
| 55 |
+
LORA_LIGHTNING,
|
| 56 |
+
weight_name="Qwen-Image-Lightning-8steps-V1.0.safetensors",
|
| 57 |
+
adapter_name="lightning"
|
| 58 |
+
)
|
| 59 |
+
|
| 60 |
+
# Establecemos los pesos de fusión óptimos entre la velocidad y la especialización
|
| 61 |
+
pipe.set_adapters(["cosplay", "lightning"], adapter_weights=[0.85, 1.0])
|
| 62 |
+
loaded_adapters["cosplay"] = True
|
| 63 |
+
print("Adaptadores fusionados y listos en GPU.")
|
| 64 |
+
|
| 65 |
+
def process_cosplay_edit(base_image, reference_image, prompt, steps, cfg, seed):
|
| 66 |
+
if base_image is None or reference_image is None:
|
| 67 |
+
raise gr.Error("Es obligatorio proporcionar tanto la imagen humana base como la referencia de anime.")
|
| 68 |
|
| 69 |
+
# Garantizar la carga del adaptador únicamente cuando se solicita la inferencia
|
| 70 |
+
load_cosplay_adapters_on_demand()
|
| 71 |
|
| 72 |
+
# Redimensionamiento de las imágenes manteniendo la coherencia de dimensiones múltiples de 8
|
| 73 |
+
width, height = base_image.size
|
| 74 |
+
max_dimension = 1024
|
| 75 |
+
if max(width, height) > max_dimension:
|
| 76 |
+
factor = max_dimension / max(width, height)
|
| 77 |
+
width = int((width * factor) // 8) * 8
|
| 78 |
+
height = int((height * factor) // 8) * 8
|
| 79 |
+
else:
|
| 80 |
+
width = int(width // 8) * 8
|
| 81 |
+
height = int(height // 8) * 8
|
| 82 |
|
| 83 |
+
base_resized = base_image.resize((width, height), Image.Resampling.LANCZOS)
|
| 84 |
+
ref_resized = reference_image.resize((width, height), Image.Resampling.LANCZOS)
|
| 85 |
+
|
| 86 |
+
generator = torch.Generator("cuda").manual_seed(int(seed))
|
| 87 |
+
|
| 88 |
+
# Inferencia sin seguimiento de gradiente para liberar caché de la GPU activamente
|
| 89 |
+
with torch.inference_mode():
|
| 90 |
+
output = pipe(
|
| 91 |
+
image=[base_resized, ref_resized],
|
| 92 |
+
prompt=prompt,
|
| 93 |
+
negative_prompt=" ",
|
| 94 |
+
width=width,
|
| 95 |
+
height=height,
|
| 96 |
+
num_inference_steps=int(steps),
|
| 97 |
+
true_cfg_scale=float(cfg),
|
| 98 |
+
generator=generator
|
| 99 |
+
)
|
| 100 |
+
|
| 101 |
+
return output.images
|
| 102 |
+
|
| 103 |
+
# Construcción de la interfaz gráfica y de la API de backend de Gradio
|
| 104 |
+
# delete_cache=(300, 300) limpia archivos temporales cada 5 minutos
|
| 105 |
+
with gr.Blocks(delete_cache=(300, 300), theme=gr.themes.Default()) as demo:
|
| 106 |
+
gr.Markdown("# **Estudio de Transferencia de Vestimenta Cosplay (Qwen-Image-Edit)**")
|
| 107 |
+
gr.Markdown("Servicio REST y de interfaz para la clonación semántica de atuendos anime sobre personas reales.")
|
| 108 |
+
|
| 109 |
+
with gr.Row():
|
| 110 |
+
with gr.Column():
|
| 111 |
+
base_input = gr.Image(label="Persona Real (Imagen 1)", type="pil")
|
| 112 |
+
ref_input = gr.Image(label="Personaje de Anime (Imagen 2)", type="pil")
|
| 113 |
+
prompt_input = gr.Textbox(
|
| 114 |
+
label="Instrucción de Edición Semántica",
|
| 115 |
+
value="把图2中动漫人物中的发型,服饰和装扮道具迁移到图1的真实人物,图1真实人物保持高度人脸一致性",
|
| 116 |
+
lines=3
|
| 117 |
+
)
|
| 118 |
+
with gr.Accordion("Parámetros del Transformador", open=False):
|
| 119 |
+
inference_steps = gr.Slider(minimum=4, maximum=24, value=8, step=1, label="Pasos de Inferencia (Lightning)")
|
| 120 |
+
cfg_scale = gr.Slider(minimum=1.0, maximum=8.0, value=1.5, step=0.5, label="True CFG Scale")
|
| 121 |
+
seed_num = gr.Number(value=1337, label="Semilla Aleatoria")
|
| 122 |
+
|
| 123 |
+
generate_btn = gr.Button("Ejecutar Edición", variant="primary")
|
| 124 |
+
|
| 125 |
+
with gr.Column():
|
| 126 |
+
output_display = gr.Image(label="Imagen Resultante")
|
| 127 |
+
|
| 128 |
+
generate_btn.click(
|
| 129 |
+
fn=process_cosplay_edit,
|
| 130 |
+
inputs=[base_input, ref_input, prompt_input, inference_steps, cfg_scale, seed_num],
|
| 131 |
+
outputs=output_display
|
| 132 |
+
)
|
| 133 |
|
| 134 |
+
# Inicializar la aplicación de Gradio
|
| 135 |
+
demo.queue()
|
| 136 |
demo.launch()
|