""" Z-Image-Turbo GGUF API - Using stable-diffusion-cpp-python Optimized for CPU inference with quantized models """ import os import io import base64 import random import gc from pathlib import Path from PIL import Image from fastapi import FastAPI, HTTPException from fastapi.responses import HTMLResponse from pydantic import BaseModel import uvicorn from huggingface_hub import hf_hub_download app = FastAPI(title="Z-Image-Turbo GGUF API") # Global model sd_model = None MODELS_DIR = Path("/tmp/models") class GenerateRequest(BaseModel): prompt: str width: int = 512 height: int = 512 seed: int = -1 num_steps: int = 8 class GenerateResponse(BaseModel): image_base64: str seed: int status: str def download_models(): """Download GGUF models from HuggingFace""" MODELS_DIR.mkdir(parents=True, exist_ok=True) models = { "diffusion": { "repo": "leejet/Z-Image-Turbo-GGUF", "file": "z_image_turbo-Q4_0.gguf", # Q4 for balance of speed/quality "local": MODELS_DIR / "z_image_turbo.gguf" }, "llm": { "repo": "unsloth/Qwen3-4B-Instruct-2507-GGUF", "file": "Qwen3-4B-Instruct-2507-Q4_K_M.gguf", "local": MODELS_DIR / "qwen3_4b.gguf" }, "vae": { "repo": "Comfy-Org/z_image_turbo", # Z-Image VAE (same as FLUX) "file": "split_files/vae/ae.safetensors", "local": MODELS_DIR / "ae.safetensors" } } for name, model in models.items(): if not model["local"].exists(): print(f"Downloading {name} model...") hf_hub_download( repo_id=model["repo"], filename=model["file"], local_dir=MODELS_DIR, local_dir_use_symlinks=False ) # Rename to expected name downloaded = MODELS_DIR / model["file"] if downloaded.exists(): downloaded.rename(model["local"]) print(f"{name} downloaded!") else: print(f"{name} already exists") return models def load_model(): """Load the Z-Image GGUF model""" global sd_model if sd_model is None: print("Loading Z-Image-Turbo GGUF model...") from stable_diffusion_cpp import StableDiffusion models = download_models() sd_model = StableDiffusion( diffusion_model_path=str(models["diffusion"]["local"]), llm_path=str(models["llm"]["local"]), vae_path=str(models["vae"]["local"]), offload_params_to_cpu=True, diffusion_flash_attn=True, ) print("Model loaded!") return sd_model @app.get("/", response_class=HTMLResponse) async def root(): """Simple HTML interface""" return """
GGUF Quantized Generate images from text using AI - Optimized for CPU