Tools

Sleeping

File size: 6,380 Bytes

from __future__ import annotations

import os
import uuid
import random
from typing import Annotated

import gradio as gr
from PIL import Image
from huggingface_hub import InferenceClient
from ._core import ROOT_DIR, get_hf_token, DEFAULT_PROVIDERS, handle_hf_error

from app import _log_call_end, _log_call_start, _truncate_for_log
from ._docstrings import autodoc

HF_API_TOKEN = get_hf_token()

# Single source of truth for the LLM-facing tool description
TOOL_SUMMARY = (
    "Generate an image from a text prompt via Hugging Face serverless inference; "
    "tunable model/steps/guidance/size, supports negative prompt and seed; returns a PIL.Image. "
    "Return the generated media to the user in this format `![Alt text](URL)`."
)


@autodoc(
    summary=TOOL_SUMMARY,
)
def Generate_Image(
    prompt: Annotated[str, "Text description of the image to generate."],
    model_id: Annotated[str, "Hugging Face model id in the form 'creator/model-name' (e.g., Tongyi-MAI/Z-Image-Turbo)."] = "Tongyi-MAI/Z-Image-Turbo",
    negative_prompt: Annotated[str, "What should NOT appear in the image."] = (
        "(deformed, distorted, disfigured), poorly drawn, bad anatomy, wrong anatomy, extra limb, "
        "missing limb, floating limbs, (mutated hands and fingers), disconnected limbs, mutation, "
        "mutated, ugly, disgusting, blurry, amputation, misspellings, typos"
    ),
    steps: Annotated[int, "Number of denoising steps (1–100). Higher = slower, potentially higher quality."] = 35,
    cfg_scale: Annotated[float, "Classifier-free guidance scale (1–20). Higher = follow the prompt more closely."] = 7.0,
    seed: Annotated[int, "Random seed for reproducibility. Use -1 for a random seed per call."] = -1,
    width: Annotated[int, "Output width in pixels (64–1216, multiple of 32 recommended)."] = 1024,
    height: Annotated[int, "Output height in pixels (64–1216, multiple of 32 recommended)."] = 1024,
    sampler: Annotated[str, "Sampling method label (UI only). Common options: 'DPM++ 2M Karras', 'DPM++ SDE Karras', 'Euler', 'Euler a', 'Heun', 'DDIM'."] = "DPM++ 2M Karras",
) -> str:
    _log_call_start(
        "Generate_Image",
        prompt=_truncate_for_log(prompt, 200),
        model_id=model_id,
        steps=steps,
        cfg_scale=cfg_scale,
        seed=seed,
        size=f"{width}x{height}",
    )
    if not prompt or not prompt.strip():
        _log_call_end("Generate_Image", "error=empty prompt")
        raise gr.Error("Please provide a non-empty prompt.")
    enhanced_prompt = f"{prompt} | ultra detail, ultra elaboration, ultra quality, perfect."
    last_error: Exception | None = None
    for provider in DEFAULT_PROVIDERS:
        try:
            client = InferenceClient(api_key=HF_API_TOKEN, provider=provider)
            image = client.text_to_image(
                prompt=enhanced_prompt,
                negative_prompt=negative_prompt,
                model=model_id,
                width=width,
                height=height,
                num_inference_steps=steps,
                guidance_scale=cfg_scale,
                seed=seed if seed != -1 else random.randint(1, 1_000_000_000),
            )
            
            filename = f"image_{uuid.uuid4().hex[:8]}.png"
            output_path = os.path.join(ROOT_DIR, filename)
            image.save(output_path)
            
            _log_call_end("Generate_Image", f"provider={provider} size={image.size} saved_to={filename}")
            return output_path
        except Exception as exc:
            last_error = exc
            continue
    
    msg = str(last_error) if last_error else "Unknown error"
    _log_call_end("Generate_Image", f"error={_truncate_for_log(msg, 200)}")
    handle_hf_error(msg, model_id, context="Image generation")


def build_interface() -> gr.Interface:
    return gr.Interface(
        fn=Generate_Image,
        inputs=[
            gr.Textbox(label="Prompt", placeholder="Enter a prompt", lines=2, info="Text description of the image to generate"),
            gr.Textbox(
                label="Model",
                value="Tongyi-MAI/Z-Image-Turbo",
                placeholder="creator/model-name",
                max_lines=1,
                info="<a href=\"https://huggingface.co/models?pipeline_tag=text-to-image&inference_provider=nebius,cerebras,novita,fireworks-ai,together,fal-ai,groq,featherless-ai,nscale,hyperbolic,sambanova,cohere,replicate,scaleway,publicai,hf-inference&sort=trending\" target=\"_blank\" rel=\"noopener noreferrer\">Browse models</a>",
            ),
            gr.Textbox(
                label="Negative Prompt",
                value=(
                    "(deformed, distorted, disfigured), poorly drawn, bad anatomy, wrong anatomy, extra limb, "
                    "missing limb, floating limbs, (mutated hands and fingers), disconnected limbs, mutation, "
                    "mutated, ugly, disgusting, blurry, amputation, misspellings, typos"
                ),
                lines=2,
                info="What should NOT appear in the image",
            ),
            gr.Slider(minimum=1, maximum=100, value=35, step=1, label="Steps", info="Number of denoising steps (1–100)"),
            gr.Slider(minimum=1.0, maximum=20.0, value=7.0, step=0.1, label="CFG Scale", info="Classifier-free guidance scale (1–20)"),
            gr.Slider(minimum=-1, maximum=1_000_000_000, value=-1, step=1, label="Seed (-1 = random)", info="Random seed for reproducibility"),
            gr.Slider(minimum=64, maximum=1216, value=1024, step=32, label="Width", info="Output width in pixels"),
            gr.Slider(minimum=64, maximum=1216, value=1024, step=32, label="Height", info="Output height in pixels"),
            gr.Radio(
                label="Sampler",
                value="DPM++ 2M Karras",
                choices=["DPM++ 2M Karras", "DPM++ SDE Karras", "Euler", "Euler a", "Heun", "DDIM"],
                info="Sampling method",
            ),
        ],
        outputs=gr.Image(label="Generated Image"),
        title="Generate Image",
        description=(
            "<div style=\"text-align:center\">Generate images via Hugging Face serverless inference. "
            "Default model is Z-Image-Turbo.</div>"
        ),
        api_description=TOOL_SUMMARY,
        flagging_mode="never",
    )


__all__ = ["Generate_Image", "build_interface"]