import torch
from diffusers import StableDiffusionXLPipeline
import gradio as gr

# Modelo base
BASE_MODEL = "John6666/animagine-xl-40-v4opt-sdxl"
# LoRA de ejemplo
LORA_MODEL = "Leemonzz/ROSPRITE"

# Detectar dispositivo
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Usando dispositivo: {device}")

# Ajuste de dtype y memoria
dtype = torch.float16 if device == "cuda" else torch.float32
low_mem = True if device == "cpu" else False

print("Cargando modelo base...")
pipe = StableDiffusionXLPipeline.from_pretrained(
    BASE_MODEL,
    torch_dtype=dtype,
    variant="fp16" if device == "cuda" else None,
    use_safetensors=True,
    low_cpu_mem_usage=low_mem
).to(device)

print("Cargando LoRA...")
pipe.load_lora_weights(LORA_MODEL)
pipe.fuse_lora(lora_scale=0.8)

def generar(prompt):
    with torch.inference_mode():
        image = pipe(prompt, num_inference_steps=25).images[0]
    return image

demo = gr.Interface(
    fn=generar,
    inputs=gr.Textbox(label="Prompt", placeholder="Escribe tu prompt aquí..."),
    outputs=gr.Image(label="Imagen generada"),
    title="Generador con LoRA (CPU/GPU)"
)

if __name__ == "__main__":
    demo.launch()