import os import io import time import spaces import gradio as gr import torch import diffusers # MUST COME FIRST os.environ["HF_HOME"] = "/tmp/huggingface" os.environ["HF_HUB_CACHE"] = "/tmp/huggingface/hub" os.environ["HF_XET_CACHE"] = "/tmp/huggingface/xet" from diffusers import ErnieImagePipeline # ------------------------ # CONFIG # ------------------------ MODEL_PATH = "Baidu/ERNIE-Image-Turbo" OUTPUT_DIR = "outputs/ernie_image_turbo" DEFAULT_HEIGHT = 848 DEFAULT_WIDTH = 1264 DEFAULT_STEPS = 8 DEFAULT_GUIDANCE = 1.0 DEFAULT_USE_PE = True os.makedirs(OUTPUT_DIR, exist_ok=True) # ------------------------ # GLOBAL LOG BUFFER # ------------------------ log_buffer = io.StringIO() def log(msg: str): print(msg) log_buffer.write(str(msg) + "\n") # Enable diffusers logs diffusers.utils.logging.set_verbosity_info() # ------------------------ # LOAD PIPELINE ONCE # ------------------------ log("Loading ERNIE-Image-Turbo pipeline...") pipe = ErnieImagePipeline.from_pretrained( MODEL_PATH, torch_dtype=torch.bfloat16, ) # Keep same GPU style as your sample pipe.to("cuda") log("Pipeline loaded on CUDA.") # ------------------------ # HELPERS # ------------------------ def pipeline_debug_info(pipe): info = [] info.append("=== PIPELINE DEBUG INFO ===") info.append(f"Pipeline Class: {pipe.__class__.__name__}") try: info.append(f"VAE Class: {pipe.vae.__class__.__name__}") info.append(f"VAE scaling factor: {getattr(pipe.vae.config, 'scaling_factor', 'N/A')}") info.append(f"VAE latent channels: {getattr(pipe.vae.config, 'latent_channels', 'N/A')}") except Exception as e: info.append(f"VAE diagnostics failed: {e}") try: if hasattr(pipe, "transformer") and pipe.transformer is not None: info.append(f"Transformer Class: {pipe.transformer.__class__.__name__}") except Exception as e: info.append(f"Transformer diagnostics failed: {e}") return "\n".join(info) def latent_shape_info(h, w, pipe): try: scale = getattr(pipe.vae.config, "scaling_factor", None) channels = getattr(pipe.vae.config, "latent_channels", None) if scale is None or channels is None: return "Latent shape info unavailable" # scaling_factor may be float/int depending on config h_lat = int(h // scale) if scale not in [0, None] else "?" w_lat = int(w // scale) if scale not in [0, None] else "?" return f"Approx latent shape → ({channels}, {h_lat}, {w_lat})" except Exception as e: return f"Latent shape calc failed: {e}" def save_images(images, width, height, seed): saved_paths = [] ts = int(time.time()) for idx, image in enumerate(images): output_path = os.path.join( OUTPUT_DIR, f"ernie_{width}x{height}_{seed}_{ts}_{idx+1}.png" ) image.save(output_path) saved_paths.append(output_path) return saved_paths # ------------------------ # IMAGE GENERATOR # ------------------------ @spaces.GPU def generate_image( prompt, height, width, num_inference_steps, guidance_scale, seed, randomize_seed, use_pe, num_images, ): log_buffer.truncate(0) log_buffer.seek(0) print(prompt) log("=== NEW GENERATION REQUEST ===") if not prompt or not str(prompt).strip(): raise gr.Error("Prompt cannot be empty.") prompt = str(prompt).strip() height = int(height) width = int(width) num_inference_steps = int(num_inference_steps) guidance_scale = float(guidance_scale) num_images = min(max(1, int(num_images)), 4) log(f"Height: {height}, Width: {width}") log(f"Inference Steps: {num_inference_steps}") log(f"Guidance Scale: {guidance_scale}") log(f"Use Prompt Enhancer: {use_pe}") log(f"Num Images: {num_images}") if randomize_seed: seed = torch.randint(0, 2**32 - 1, (1,)).item() log(f"Randomized Seed → {seed}") else: seed = int(seed) log(f"Seed: {seed}") generator = torch.Generator("cuda").manual_seed(seed) log("Running pipeline forward()...") result = pipe( prompt=prompt, height=height, width=width, num_inference_steps=num_inference_steps, guidance_scale=guidance_scale, use_pe=bool(use_pe), generator=generator, num_images_per_prompt=num_images, ) images = result.images try: log(pipeline_debug_info(pipe)) log(latent_shape_info(height, width, pipe)) except Exception as e: log(f"Diagnostics error: {e}") saved_paths = save_images(images, width, height, seed) log("Pipeline finished.") log("Saved files:") for p in saved_paths: log(p) log("Returning images...") return images, seed, "\n".join(saved_paths), log_buffer.getvalue() # ------------------------ # EXAMPLES # ------------------------ EXAMPLE_PROMPT = """A highly detailed close-up scene of a middle-aged marine biologist sitting on a weathered dock beside a large sea lion resting partially out of the water. The man wears a faded blue jacket with a stitched name tag reading "Dr. Elias Moreno" and rubber boots marked "Harbor Lab Unit 3" along the sides. A waterproof tablet hangs from a strap across his chest, displaying a screen labeled "Specimen Log v2.4". The sea lion's whiskers glisten with moisture, and a small yellow tag attached to its fin clearly reads "SL-207". Around them, ropes, buoys, and fishing nets are loosely arranged across the dock, including a red buoy labeled "Zone C". A metal bucket filled with fish sits nearby with the printed text "BAIT ONLY", and a clipboard rests on the wood with handwritten notes titled "Feeding Schedule - Morning Session". In the background, wooden posts rise from the water toward a foggy harbor where a distant boat shows the name "North Star" painted along its side. A warning sign nailed to one post reads "CAUTION: WET SURFACE". The lighting is cool and diffused, with subtle highlights on wet surfaces and muted shadows stretching along the dock. Fine details such as fabric creases, water droplets, and the texture of the sea lion's skin are clearly defined.""" examples = [ [EXAMPLE_PROMPT], ["A cinematic portrait of a futuristic astronaut standing in neon rain, hyper detailed, reflective helmet, moody lighting"], ["A cozy mountain cabin in winter at sunset, smoke from chimney, warm interior lights, ultra detailed"], ["A majestic white tiger walking through a misty forest, volumetric light, sharp fur detail"], ["A fantasy castle floating above clouds during golden hour, intricate architecture, epic atmosphere"], ] # ------------------------ # GRADIO UI # ------------------------ with gr.Blocks(title="ERNIE-Image-Turbo GPU Demo") as demo: gr.Markdown("# 🎨 ERNIE-Image-Turbo — GPU Gradio Demo") with gr.Row(): with gr.Column(scale=1): prompt = gr.Textbox( label="Prompt", lines=10, value=EXAMPLE_PROMPT, ) with gr.Row(): height = gr.Slider(512, 1536, DEFAULT_HEIGHT, step=8, label="Height") width = gr.Slider(512, 1536, DEFAULT_WIDTH, step=8, label="Width") num_images = gr.Slider(1, 4, 1, step=1, label="Number of Images") num_inference_steps = gr.Slider( 1, 20, DEFAULT_STEPS, step=1, label="Inference Steps" ) guidance_scale = gr.Slider( 0.0, 10.0, DEFAULT_GUIDANCE, step=0.1, label="Guidance Scale" ) use_pe = gr.Checkbox( label="Use Prompt Enhancer", value=DEFAULT_USE_PE, ) with gr.Row(): seed = gr.Number(label="Seed", value=42, precision=0) randomize_seed = gr.Checkbox(label="Randomize Seed", value=True) generate_btn = gr.Button("🚀 Generate", variant="primary") with gr.Column(scale=1): output_images = gr.Gallery( label="Generated Images", preview=True, columns=2, height="auto", ) used_seed = gr.Number(label="Seed Used", interactive=False) saved_paths = gr.Textbox( label="Saved File Paths", lines=6, interactive=False, ) debug_log = gr.Textbox( label="Debug Log Output", lines=18, interactive=False, ) gr.Examples(examples=examples, inputs=[prompt], cache_examples=False) generate_btn.click( fn=generate_image, inputs=[ prompt, height, width, num_inference_steps, guidance_scale, seed, randomize_seed, use_pe, num_images, ], outputs=[output_images, used_seed, saved_paths, debug_log], ) if __name__ == "__main__": demo.queue().launch(server_name="0.0.0.0", server_port=7860)