import gradio as gr import numpy as np import random import requests import base64 from PIL import Image import io import json import os import time # IMPORTANTE: Descomenta para usar ZeroGPU en plan Pro import spaces # Para usar ZeroGPU H200 from diffusers import DiffusionPipeline import torch from huggingface_hub import login print("🚀 Iniciando aplicación con ZeroGPU H200...") print(f"📁 Directorio actual: {os.getcwd()}") print(f"🐍 Python version: {os.sys.version}") # Configurar autenticación con Hugging Face HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HUGGING_FACE_HUB_TOKEN") if HF_TOKEN: try: print(f"🔑 Token detectado: {HF_TOKEN[:10]}...") login(token=HF_TOKEN) print("✅ Autenticado con Hugging Face") print(f"🔑 Token configurado: {HF_TOKEN[:10]}...") except Exception as e: print(f"⚠️ Error de autenticación: {e}") else: print("⚠️ No se encontró HF_TOKEN - modelos gated no estarán disponibles") print("💡 Para usar modelos FLUX, configura la variable de entorno HF_TOKEN en el Space") # Optimización para ZeroGPU H200 device = "cuda" if torch.cuda.is_available() else "cpu" print(f"🖥️ Dispositivo detectado: {device}") print(f"🔥 CUDA disponible: {torch.cuda.is_available()}") if torch.cuda.is_available(): print(f"🎮 GPU: {torch.cuda.get_device_name(0)}") print(f"💾 Memoria GPU: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB") print("🚀 ZeroGPU H200 detectado - Optimizando para máximo rendimiento") # Configuración optimizada para H200 torch_dtype = torch.float16 # Usar float16 para mayor velocidad print("⚡ Usando torch.float16 para H200") # Optimizaciones adicionales para H200 torch.backends.cudnn.benchmark = True torch.backends.cuda.matmul.allow_tf32 = True torch.backends.cudnn.allow_tf32 = True print("🔧 Optimizaciones CUDA habilitadas para H200") else: torch_dtype = torch.float32 print("🐌 Usando torch.float32 para CPU") # Modelos disponibles de alta calidad (optimizados para H200) MODELS = { "SDXL Turbo (stabilityai/sdxl-turbo)": "stabilityai/sdxl-turbo", "SD Turbo (stabilityai/sd-turbo)": "stabilityai/sd-turbo", "SDXL Base (stabilityai/stable-diffusion-xl-base-1.0)": "stabilityai/stable-diffusion-xl-base-1.0", "Realistic Vision (SG161222/Realistic_Vision_V5.1_noVAE)": "SG161222/Realistic_Vision_V5.1_noVAE", "OpenJourney v4 (prompthero/openjourney-v4)": "prompthero/openjourney-v4", "Anything v3 (Linaqruf/anything-v3.0)": "Linaqruf/anything-v3.0", "Waifu Diffusion (hakurei/waifu-diffusion)": "hakurei/waifu-diffusion", "Deliberate v2 (XpucT/deliberate-v2)": "XpucT/deliberate-v2", "Dreamlike Diffusion (dreamlike-art/dreamlike-diffusion-1.0)": "dreamlike-art/dreamlike-diffusion-1.0", # Modelos adicionales optimizados para H200 "Stable Diffusion v1.5 (runwayml/stable-diffusion-v1-5)": "runwayml/stable-diffusion-v1-5", "Stable Diffusion v1.4 (CompVis/stable-diffusion-v1-4)": "CompVis/stable-diffusion-v1-4", "Midjourney Style (prompthero/openjourney)": "prompthero/openjourney", "Orange Mixs (WarriorMama777/OrangeMixs)": "WarriorMama777/OrangeMixs", "Kohaku V2.1 (KBlueLeaf/kohaku-v2.1)": "KBlueLeaf/kohaku-v2.1", # Modelos avanzados que aprovechan H200 (solo los que existen) "SDXL Lightning (ByteDance/SDXL-Lightning)": "ByteDance/SDXL-Lightning", "FLUX.1-Kontext-Dev (API External)": "api_external", } # Modelos FLUX (solo si hay token) - Optimizados para H200 if HF_TOKEN: FLUX_MODELS = { "FLUX.1-dev (black-forest-labs/FLUX.1-dev)": "black-forest-labs/FLUX.1-dev", "FLUX.1-schnell (black-forest-labs/FLUX.1-schnell)": "black-forest-labs/FLUX.1-schnell", } MODELS.update(FLUX_MODELS) print("🔓 Modelos FLUX habilitados con autenticación") print(f"📊 Total de modelos disponibles: {len(MODELS)}") else: print("🔒 Modelos FLUX deshabilitados - requiere HF_TOKEN") print(f"📊 Total de modelos disponibles: {len(MODELS)}") print("📋 Modelos cargados (optimizados para H200):") for i, (name, model_id) in enumerate(MODELS.items(), 1): status = "🔐" if "flux" in model_id.lower() or "black-forest" in model_id.lower() else "📦" gpu_opt = "⚡" if "turbo" in model_id.lower() or "lightning" in model_id.lower() else "🎨" print(f" {i:2d}. {status} {gpu_opt} {name}") # Estado del pipeline pipe = None current_model_id = None MAX_SEED = np.iinfo(np.int32).max MAX_IMAGE_SIZE = 1024 # Función para cargar el modelo optimizada para H200 def load_model(model_id): global pipe, current_model_id print(f"\n🔄 Iniciando carga del modelo: {model_id}") if pipe is None or model_id != current_model_id: try: start_time = time.time() # Determinar si usar variant fp16 basado en el modelo use_fp16_variant = False if torch.cuda.is_available(): # Solo usar fp16 variant para modelos que lo soportan fp16_supported_models = [ "stabilityai/sdxl-turbo", "stabilityai/sd-turbo", "stabilityai/stable-diffusion-xl-base-1.0", "runwayml/stable-diffusion-v1-5", "CompVis/stable-diffusion-v1-4" ] use_fp16_variant = any(model in model_id for model in fp16_supported_models) print(f"🔧 FP16 variant: {'✅ Habilitado' if use_fp16_variant else '❌ Deshabilitado'} para {model_id}") # Usar token de autenticación si está disponible if HF_TOKEN and ("flux" in model_id.lower() or "black-forest" in model_id.lower()): print(f"🔐 Cargando modelo gated: {model_id}") print(f"🔑 Usando token de autenticación...") # Para modelos FLUX, no usar variant fp16 pipe = DiffusionPipeline.from_pretrained( model_id, torch_dtype=torch_dtype, use_auth_token=HF_TOKEN, variant="fp16" if use_fp16_variant else None ) else: print(f"📦 Cargando modelo público: {model_id}") pipe = DiffusionPipeline.from_pretrained( model_id, torch_dtype=torch_dtype, variant="fp16" if use_fp16_variant else None ) load_time = time.time() - start_time print(f"⏱️ Tiempo de carga: {load_time:.2f} segundos") print(f"🚀 Moviendo modelo a dispositivo: {device}") pipe = pipe.to(device) # Optimizaciones específicas para H200 if torch.cuda.is_available(): print("🔧 Aplicando optimizaciones para H200...") # Habilitar optimizaciones de memoria (más conservadoras) if hasattr(pipe, 'enable_attention_slicing'): pipe.enable_attention_slicing() print("✅ Attention slicing habilitado") # Deshabilitar CPU offload temporalmente (causa problemas con ZeroGPU) # if hasattr(pipe, 'enable_model_cpu_offload') and "sdxl" in model_id.lower(): # pipe.enable_model_cpu_offload() # print("✅ CPU offload habilitado (modelo grande)") if hasattr(pipe, 'enable_vae_slicing'): pipe.enable_vae_slicing() print("✅ VAE slicing habilitado") # XFormers solo si está disponible y el modelo lo soporta if hasattr(pipe, 'enable_xformers_memory_efficient_attention'): # FLUX models tienen problemas con XFormers, deshabilitar if "flux" in model_id.lower() or "black-forest" in model_id.lower(): print("⚠️ XFormers deshabilitado para modelos FLUX (incompatible)") else: try: pipe.enable_xformers_memory_efficient_attention() print("✅ XFormers memory efficient attention habilitado") except Exception as e: print(f"⚠️ XFormers no disponible: {e}") print("🔄 Usando atención estándar") current_model_id = model_id print(f"✅ Modelo {model_id} cargado exitosamente") if torch.cuda.is_available(): memory_used = torch.cuda.memory_allocated() / 1024**3 memory_reserved = torch.cuda.memory_reserved() / 1024**3 print(f"💾 Memoria GPU utilizada: {memory_used:.2f} GB") print(f"💾 Memoria GPU reservada: {memory_reserved:.2f} GB") # Verificar si la memoria es sospechosamente baja if memory_used < 0.1: print("⚠️ ADVERTENCIA: Memoria GPU muy baja - posible problema de carga") else: print("💾 Memoria CPU") except Exception as e: print(f"❌ Error cargando modelo {model_id}: {e}") print(f"🔍 Tipo de error: {type(e).__name__}") # Intentar cargar sin variant fp16 si falló if "variant" in str(e) and "fp16" in str(e): print("🔄 Reintentando sin variant fp16...") try: pipe = DiffusionPipeline.from_pretrained( model_id, torch_dtype=torch_dtype, use_auth_token=HF_TOKEN if HF_TOKEN and ("flux" in model_id.lower() or "black-forest" in model_id.lower()) else None ) pipe = pipe.to(device) current_model_id = model_id print(f"✅ Modelo {model_id} cargado exitosamente (sin fp16 variant)") except Exception as e2: print(f"❌ Error en segundo intento: {e2}") raise e2 else: raise e else: print(f"♻️ Modelo {model_id} ya está cargado, reutilizando...") # Función para usar la API externa de FLUX.1-Kontext-Dev def use_external_api(prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps): try: print("\n🌐 Conectando a API externa FLUX.1-Kontext-Dev...") print(f"📝 Prompt: {prompt[:50]}...") print(f"🔧 Parámetros: {width}x{height}, guidance={guidance_scale}, steps={num_inference_steps}") start_time = time.time() # URL de la API del Space externo api_url = "https://black-forest-labs-flux-1-kontext-dev.hf.space/api/predict/" # Crear una imagen base simple para la API (requiere input_image) print("🖼️ Creando imagen base para API...") base_image = Image.new('RGB', (width, height), color='white') img_byte_arr = io.BytesIO() base_image.save(img_byte_arr, format='PNG') img_byte_arr = img_byte_arr.getvalue() # Preparar los datos para la API print("📦 Preparando datos para API...") files = { 'data': (None, json.dumps([ base64.b64encode(img_byte_arr).decode('utf-8'), # input_image prompt, # prompt seed, # seed randomize_seed, # randomize_seed guidance_scale, # guidance_scale num_inference_steps # steps ])) } # Hacer la petición a la API print(f"🌐 Enviando petición a: {api_url}") response = requests.post(api_url, files=files, timeout=60) api_time = time.time() - start_time print(f"⏱️ Tiempo de respuesta API: {api_time:.2f} segundos") if response.status_code == 200: print("✅ Respuesta exitosa de API") result = response.json() # La API devuelve [image_data, seed] image_data = result['data'][0] new_seed = result['data'][1] print("🖼️ Decodificando imagen...") # Decodificar la imagen image_bytes = base64.b64decode(image_data.split(',')[1]) image = Image.open(io.BytesIO(image_bytes)) total_time = time.time() - start_time print(f"✅ API externa exitosa - Tiempo total: {total_time:.2f} segundos") return image, new_seed else: print(f"❌ Error de API: {response.status_code}") print(f"📄 Respuesta: {response.text[:200]}...") raise Exception(f"API Error: {response.status_code} - {response.text}") except Exception as e: print(f"❌ Error usando API externa: {e}") print(f"🔍 Tipo de error: {type(e).__name__}") # Fallback: crear una imagen de error error_image = Image.new('RGB', (width, height), color='red') return error_image, seed # Función para mostrar información del modelo seleccionado def get_model_info(model_name): model_id = MODELS.get(model_name, "") if not model_id: return "**Model Info:** Select a model to see its specific configuration recommendations." info = f"**Model Info:** {model_name}\n\n" # Información específica por modelo if "turbo" in model_id.lower(): info += "⚡ **Fast Model** - Optimized for speed\n" info += "• Recommended steps: 1-4\n" info += "• Guidance scale: 0.0-1.0\n" info += "• Best for: Quick iterations\n\n" elif "lightning" in model_id.lower(): info += "⚡ **Lightning Model** - Ultra fast\n" info += "• Recommended steps: 4-8\n" info += "• Guidance scale: 0.0-1.0\n" info += "• Best for: Rapid prototyping\n\n" elif "flux" in model_id.lower(): info += "🔐 **FLUX Model** - High quality\n" info += "• Recommended steps: 20-50\n" info += "• Guidance scale: 3.5-7.5\n" info += "• Best for: Professional results\n\n" elif "realistic" in model_id.lower(): info += "👤 **Realistic Model** - Photorealistic\n" info += "• Recommended steps: 25-50\n" info += "• Guidance scale: 7.5-12.0\n" info += "• Best for: Realistic portraits\n\n" elif "openjourney" in model_id.lower(): info += "🎨 **OpenJourney Model** - Midjourney style\n" info += "• Recommended steps: 20-30\n" info += "• Guidance scale: 7.5-10.0\n" info += "• Best for: Artistic styles\n\n" elif "waifu" in model_id.lower(): info += "🌸 **Waifu Model** - Anime style\n" info += "• Recommended steps: 20-30\n" info += "• Guidance scale: 7.5-10.0\n" info += "• Best for: Anime characters\n\n" elif "anything" in model_id.lower(): info += "🎭 **Anything Model** - Versatile\n" info += "• Recommended steps: 20-30\n" info += "• Guidance scale: 7.5-10.0\n" info += "• Best for: Creative concepts\n\n" else: info += "📦 **Standard Model**\n" info += "• Recommended steps: 20-50\n" info += "• Guidance scale: 7.5-12.0\n" info += "• Best for: General use\n\n" info += f"**Model ID:** `{model_id}`\n" info += "**Status:** ✅ Available" return info # @spaces.GPU #[uncomment to use ZeroGPU] @spaces.GPU def infer( prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps, model_name, eta=0.0, strength=1.0, num_images_per_prompt=1, safety_checker=True, progress=gr.Progress(track_tqdm=True), ): try: print(f"\n🎨 Iniciando generación de imagen con H200...") print(f"📝 Prompt: {prompt}") print(f"🚫 Negative prompt: {negative_prompt}") print(f"🎲 Seed: {seed} (randomize: {randomize_seed})") print(f"📐 Dimensiones: {width}x{height}") print(f"🎯 Guidance scale: {guidance_scale}") print(f"🔄 Inference steps: {num_inference_steps}") print(f"🎯 Eta: {eta}") print(f"💪 Strength: {strength}") print(f"🖼️ Images per prompt: {num_images_per_prompt}") print(f"🛡️ Safety checker: {safety_checker}") print(f"🎯 Modelo seleccionado: {model_name}") start_time = time.time() # Verificar si es el modelo externo if model_name == "FLUX.1-Kontext-Dev (API External)": return use_external_api(prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps) # Cargar el modelo seleccionado model_id = MODELS[model_name] print(f"🔧 Cargando modelo: {model_id}") load_model(model_id) if randomize_seed: old_seed = seed seed = random.randint(0, MAX_SEED) print(f"🎲 Seed aleatorizado: {old_seed} → {seed}") print(f"🎲 Configurando generador con seed: {seed}") generator = torch.Generator(device=device).manual_seed(seed) # RESPETAR LAS CONFIGURACIONES DEL USUARIO # Solo aplicar límites mínimos para modelos específicos si es necesario final_guidance_scale = guidance_scale final_inference_steps = num_inference_steps # Aplicar límites mínimos solo para modelos que lo requieren if "turbo" in model_id.lower(): # Para modelos turbo, asegurar al menos 1 paso if final_inference_steps < 1: final_inference_steps = 1 print(f"⚡ Modelo turbo - Ajustando steps mínimo: {num_inference_steps} → {final_inference_steps}") elif "lightning" in model_id.lower(): # Para modelos lightning, asegurar al menos 4 pasos if final_inference_steps < 4: final_inference_steps = 4 print(f"⚡ Modelo lightning - Ajustando steps mínimo: {num_inference_steps} → {final_inference_steps}") # Aplicar límites de guidance scale solo si es necesario if final_guidance_scale < 0.0: final_guidance_scale = 0.0 print(f"⚠️ Guidance scale ajustado al mínimo: {guidance_scale} → {final_guidance_scale}") elif final_guidance_scale > 20.0: final_guidance_scale = 20.0 print(f"⚠️ Guidance scale ajustado al máximo: {guidance_scale} → {final_guidance_scale}") print(f"⚙️ Parámetros finales (respetando configuración del usuario):") print(f" - Guidance scale: {guidance_scale} → {final_guidance_scale}") print(f" - Inference steps: {num_inference_steps} → {final_inference_steps}") print(f" - Width: {width}, Height: {height}") print(f" - Seed: {seed}") print(f" - Eta: {eta}") print(f" - Strength: {strength}") print(f" - Images per prompt: {num_images_per_prompt}") print("🎨 Iniciando generación de imagen con H200...") inference_start = time.time() # Preparar parámetros adicionales para modelos que los soporten additional_params = {} # Agregar parámetros adicionales según el modelo if hasattr(pipe, 'scheduler') and hasattr(pipe.scheduler, 'beta_start'): # Algunos modelos soportan parámetros de scheduler additional_params['eta'] = eta if hasattr(pipe, 'vae') and hasattr(pipe.vae, 'scale_factor'): # Algunos modelos soportan parámetros de VAE additional_params['output_type'] = 'pil' # Configurar safety checker if hasattr(pipe, 'safety_checker') and not safety_checker: pipe.safety_checker = None print("🛡️ Safety checker deshabilitado") # Configurar número de imágenes if num_images_per_prompt > 1: additional_params['num_images_per_prompt'] = num_images_per_prompt # Optimizaciones específicas para H200 if torch.cuda.is_available(): print("🚀 Aplicando optimizaciones específicas para H200...") # Limpiar cache de GPU antes de la inferencia torch.cuda.empty_cache() # Generar la imagen (sin mixed precision para evitar problemas) print("⚡ Generando imagen con H200...") # Generar la imagen result = pipe( prompt=prompt, negative_prompt=negative_prompt, guidance_scale=final_guidance_scale, num_inference_steps=final_inference_steps, width=width, height=height, generator=generator, **additional_params ) # Verificar que la imagen se generó correctamente if hasattr(result, 'images') and len(result.images) > 0: image = result.images[0] # Verificar que la imagen no sea completamente negra if image is not None: # Convertir a numpy para verificar img_array = np.array(image) if img_array.size > 0: # Verificar si la imagen es completamente negra if np.all(img_array == 0) or np.all(img_array < 10): print("⚠️ ADVERTENCIA: Imagen generada es completamente negra") print("🔄 Reintentando con parámetros ajustados...") # Reintentar con parámetros más conservadores result = pipe( prompt=prompt, negative_prompt=negative_prompt, guidance_scale=max(1.0, final_guidance_scale * 0.8), num_inference_steps=max(10, final_inference_steps), width=width, height=height, generator=generator ) image = result.images[0] else: print("✅ Imagen generada correctamente") else: print("❌ Error: Imagen vacía") raise Exception("Imagen vacía generada") else: print("❌ Error: Imagen es None") raise Exception("Imagen es None") else: print("❌ Error: No se generaron imágenes") raise Exception("No se generaron imágenes") else: # Fallback para CPU result = pipe( prompt=prompt, negative_prompt=negative_prompt, guidance_scale=final_guidance_scale, num_inference_steps=final_inference_steps, width=width, height=height, generator=generator, **additional_params ) image = result.images[0] inference_time = time.time() - inference_start total_time = time.time() - start_time print(f"✅ Imagen generada exitosamente con H200!") print(f"⏱️ Tiempo de inferencia: {inference_time:.2f} segundos") print(f"⏱️ Tiempo total: {total_time:.2f} segundos") print(f"🎲 Seed final: {seed}") if torch.cuda.is_available(): print(f"💾 Memoria GPU utilizada: {torch.cuda.memory_allocated() / 1024**3:.2f} GB") print(f"💾 Memoria GPU libre: {torch.cuda.memory_reserved() / 1024**3:.2f} GB") print(f"🚀 Velocidad H200: {final_inference_steps/inference_time:.1f} steps/segundo") else: print("💾 Memoria CPU") return image, seed except Exception as e: print(f"❌ Error en inferencia: {e}") print(f"🔍 Tipo de error: {type(e).__name__}") print(f"📋 Detalles del error: {str(e)}") # Crear imagen de error error_image = Image.new('RGB', (width, height), color='red') return error_image, seed examples = [ "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k", "An astronaut riding a green horse", "A delicious ceviche cheesecake slice", "Futuristic AI assistant in a glowing galaxy, neon lights, sci-fi style, cinematic", "Portrait of a beautiful woman, realistic, high quality, detailed", "Anime girl with blue hair, detailed, high quality", "Cyberpunk city at night, neon lights, detailed, 8k", "Fantasy landscape with mountains and dragons, epic, detailed", ] css = """ #col-container { margin: 0 auto; max-width: 640px; } """ with gr.Blocks(css=css) as demo: with gr.Column(elem_id="col-container"): gr.Markdown(" # Text-to-Image Gradio Template") with gr.Row(): prompt = gr.Text( label="Prompt", show_label=False, max_lines=1, placeholder="Enter your prompt", container=False, ) run_button = gr.Button("Run", scale=0, variant="primary") result = gr.Image(label="Result", show_label=False) with gr.Accordion("Advanced Settings", open=False): model_selector = gr.Dropdown( choices=list(MODELS.keys()), value=list(MODELS.keys())[0], label="Model", info="Select a high-quality model (FLUX models require HF_TOKEN)" ) negative_prompt = gr.Text( label="Negative prompt", max_lines=2, placeholder="Enter a negative prompt (optional)", visible=True, ) with gr.Row(): seed = gr.Slider( label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0, ) randomize_seed = gr.Checkbox(label="Randomize seed", value=True) with gr.Row(): width = gr.Slider( label="Width", minimum=256, maximum=MAX_IMAGE_SIZE, step=32, value=1024, ) height = gr.Slider( label="Height", minimum=256, maximum=MAX_IMAGE_SIZE, step=32, value=1024, ) with gr.Row(): guidance_scale = gr.Slider( label="Guidance scale", minimum=0.0, maximum=20.0, step=0.1, value=7.5, info="Controls how closely the image follows the prompt (higher = more adherence)" ) num_inference_steps = gr.Slider( label="Number of inference steps", minimum=1, maximum=100, step=1, value=20, info="More steps = higher quality but slower generation" ) with gr.Row(): # Parámetros adicionales para modelos avanzados eta = gr.Slider( label="Eta (DDIM)", minimum=0.0, maximum=1.0, step=0.01, value=0.0, info="DDIM eta parameter (0 = deterministic, 1 = stochastic)" ) strength = gr.Slider( label="Strength", minimum=0.0, maximum=1.0, step=0.01, value=1.0, info="Strength of the transformation (for img2img models)" ) with gr.Row(): # Configuraciones de calidad num_images_per_prompt = gr.Slider( label="Images per prompt", minimum=1, maximum=4, step=1, value=1, info="Number of images to generate (may slow down generation)" ) safety_checker = gr.Checkbox( label="Safety checker", value=True, info="Enable content safety filtering" ) # Información del modelo seleccionado model_info = gr.Markdown( value="**Model Info:** Select a model to see its specific configuration recommendations.", label="Model Information" ) gr.Examples(examples=examples, inputs=[prompt]) gr.on( triggers=[run_button.click, prompt.submit], fn=infer, inputs=[ prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps, model_selector, eta, strength, num_images_per_prompt, safety_checker, ], outputs=[result, seed], ) # Actualizar información del modelo cuando se seleccione model_selector.change( fn=get_model_info, inputs=[model_selector], outputs=[model_info] ) if __name__ == "__main__": print("🚀 Iniciando Gradio app...") demo.launch()