|
|
"""
|
|
|
Z-Image-Turbo GGUF API - Using stable-diffusion-cpp-python
|
|
|
Optimized for CPU inference with quantized models
|
|
|
"""
|
|
|
import os
|
|
|
import io
|
|
|
import base64
|
|
|
import random
|
|
|
import gc
|
|
|
from pathlib import Path
|
|
|
from PIL import Image
|
|
|
from fastapi import FastAPI, HTTPException
|
|
|
from fastapi.responses import HTMLResponse
|
|
|
from pydantic import BaseModel
|
|
|
import uvicorn
|
|
|
from huggingface_hub import hf_hub_download
|
|
|
|
|
|
app = FastAPI(title="Z-Image-Turbo GGUF API")
|
|
|
|
|
|
|
|
|
sd_model = None
|
|
|
MODELS_DIR = Path("/tmp/models")
|
|
|
|
|
|
class GenerateRequest(BaseModel):
|
|
|
prompt: str
|
|
|
width: int = 512
|
|
|
height: int = 512
|
|
|
seed: int = -1
|
|
|
num_steps: int = 8
|
|
|
|
|
|
class GenerateResponse(BaseModel):
|
|
|
image_base64: str
|
|
|
seed: int
|
|
|
status: str
|
|
|
|
|
|
def download_models():
|
|
|
"""Download GGUF models from HuggingFace"""
|
|
|
MODELS_DIR.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
models = {
|
|
|
"diffusion": {
|
|
|
"repo": "leejet/Z-Image-Turbo-GGUF",
|
|
|
"file": "z_image_turbo-Q4_0.gguf",
|
|
|
"local": MODELS_DIR / "z_image_turbo.gguf"
|
|
|
},
|
|
|
"llm": {
|
|
|
"repo": "unsloth/Qwen3-4B-Instruct-2507-GGUF",
|
|
|
"file": "Qwen3-4B-Instruct-2507-Q4_K_M.gguf",
|
|
|
"local": MODELS_DIR / "qwen3_4b.gguf"
|
|
|
},
|
|
|
"vae": {
|
|
|
"repo": "Comfy-Org/z_image_turbo",
|
|
|
"file": "split_files/vae/ae.safetensors",
|
|
|
"local": MODELS_DIR / "ae.safetensors"
|
|
|
}
|
|
|
}
|
|
|
|
|
|
for name, model in models.items():
|
|
|
if not model["local"].exists():
|
|
|
print(f"Downloading {name} model...")
|
|
|
hf_hub_download(
|
|
|
repo_id=model["repo"],
|
|
|
filename=model["file"],
|
|
|
local_dir=MODELS_DIR,
|
|
|
local_dir_use_symlinks=False
|
|
|
)
|
|
|
|
|
|
downloaded = MODELS_DIR / model["file"]
|
|
|
if downloaded.exists():
|
|
|
downloaded.rename(model["local"])
|
|
|
print(f"{name} downloaded!")
|
|
|
else:
|
|
|
print(f"{name} already exists")
|
|
|
|
|
|
return models
|
|
|
|
|
|
def load_model():
|
|
|
"""Load the Z-Image GGUF model"""
|
|
|
global sd_model
|
|
|
if sd_model is None:
|
|
|
print("Loading Z-Image-Turbo GGUF model...")
|
|
|
from stable_diffusion_cpp import StableDiffusion
|
|
|
|
|
|
models = download_models()
|
|
|
|
|
|
sd_model = StableDiffusion(
|
|
|
diffusion_model_path=str(models["diffusion"]["local"]),
|
|
|
llm_path=str(models["llm"]["local"]),
|
|
|
vae_path=str(models["vae"]["local"]),
|
|
|
offload_params_to_cpu=True,
|
|
|
diffusion_flash_attn=True,
|
|
|
)
|
|
|
print("Model loaded!")
|
|
|
return sd_model
|
|
|
|
|
|
@app.get("/", response_class=HTMLResponse)
|
|
|
async def root():
|
|
|
"""Simple HTML interface"""
|
|
|
return """
|
|
|
<!DOCTYPE html>
|
|
|
<html>
|
|
|
<head>
|
|
|
<title>Z-Image-Turbo GGUF API</title>
|
|
|
<style>
|
|
|
* { box-sizing: border-box; }
|
|
|
body {
|
|
|
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
|
|
|
background: linear-gradient(135deg, #0f0c29 0%, #302b63 50%, #24243e 100%);
|
|
|
color: white;
|
|
|
min-height: 100vh;
|
|
|
margin: 0;
|
|
|
padding: 20px;
|
|
|
}
|
|
|
.container { max-width: 800px; margin: 0 auto; }
|
|
|
h1 { text-align: center; font-size: 2.5em; margin-bottom: 10px; }
|
|
|
.badge {
|
|
|
display: inline-block;
|
|
|
background: linear-gradient(135deg, #11998e 0%, #38ef7d 100%);
|
|
|
padding: 4px 12px;
|
|
|
border-radius: 20px;
|
|
|
font-size: 12px;
|
|
|
font-weight: bold;
|
|
|
}
|
|
|
.subtitle { text-align: center; opacity: 0.7; margin-bottom: 30px; }
|
|
|
.form-group { margin-bottom: 20px; }
|
|
|
label { display: block; margin-bottom: 8px; font-weight: 500; }
|
|
|
input, textarea {
|
|
|
width: 100%;
|
|
|
padding: 12px;
|
|
|
border: none;
|
|
|
border-radius: 8px;
|
|
|
background: rgba(255,255,255,0.1);
|
|
|
color: white;
|
|
|
font-size: 16px;
|
|
|
}
|
|
|
textarea { min-height: 100px; resize: vertical; }
|
|
|
input:focus, textarea:focus { outline: 2px solid #38ef7d; }
|
|
|
button {
|
|
|
width: 100%;
|
|
|
padding: 15px;
|
|
|
background: linear-gradient(135deg, #11998e 0%, #38ef7d 100%);
|
|
|
border: none;
|
|
|
border-radius: 8px;
|
|
|
color: white;
|
|
|
font-size: 18px;
|
|
|
font-weight: 600;
|
|
|
cursor: pointer;
|
|
|
transition: transform 0.2s;
|
|
|
}
|
|
|
button:hover { transform: scale(1.02); }
|
|
|
button:disabled { opacity: 0.5; cursor: not-allowed; }
|
|
|
.result {
|
|
|
margin-top: 30px;
|
|
|
text-align: center;
|
|
|
padding: 20px;
|
|
|
background: rgba(255,255,255,0.05);
|
|
|
border-radius: 12px;
|
|
|
}
|
|
|
.result img { max-width: 100%; border-radius: 8px; }
|
|
|
.warning {
|
|
|
background: rgba(17,153,142,0.3);
|
|
|
padding: 15px;
|
|
|
border-radius: 8px;
|
|
|
margin-bottom: 20px;
|
|
|
border-left: 4px solid #38ef7d;
|
|
|
}
|
|
|
.row { display: flex; gap: 15px; }
|
|
|
.row .form-group { flex: 1; }
|
|
|
#status { margin-top: 15px; font-style: italic; opacity: 0.8; }
|
|
|
</style>
|
|
|
</head>
|
|
|
<body>
|
|
|
<div class="container">
|
|
|
<h1>🎨 Z-Image-Turbo API</h1>
|
|
|
<p class="subtitle">
|
|
|
<span class="badge">GGUF Quantized</span>
|
|
|
Generate images from text using AI - Optimized for CPU
|
|
|
</p>
|
|
|
|
|
|
<div class="warning">
|
|
|
⚡ <strong>GGUF Quantized Model</strong> - Faster and lighter than full model. First run downloads ~6GB of models.
|
|
|
</div>
|
|
|
|
|
|
<div class="form-group">
|
|
|
<label>Prompt</label>
|
|
|
<textarea id="prompt" placeholder="A cinematic photograph of a solitary hooded figure walking through a rain-slicked metropolis at night..."></textarea>
|
|
|
</div>
|
|
|
|
|
|
<div class="row">
|
|
|
<div class="form-group">
|
|
|
<label>Width</label>
|
|
|
<input type="number" id="width" value="512" min="256" max="1024" step="64">
|
|
|
</div>
|
|
|
<div class="form-group">
|
|
|
<label>Height</label>
|
|
|
<input type="number" id="height" value="512" min="256" max="1024" step="64">
|
|
|
</div>
|
|
|
<div class="form-group">
|
|
|
<label>Seed (-1 = random)</label>
|
|
|
<input type="number" id="seed" value="-1">
|
|
|
</div>
|
|
|
</div>
|
|
|
|
|
|
<button id="generateBtn" onclick="generate()">🚀 Generate Image</button>
|
|
|
<p id="status"></p>
|
|
|
|
|
|
<div class="result" id="result" style="display:none;">
|
|
|
<img id="resultImg" src="" alt="Generated image">
|
|
|
<p id="resultInfo"></p>
|
|
|
</div>
|
|
|
</div>
|
|
|
|
|
|
<script>
|
|
|
async function generate() {
|
|
|
const btn = document.getElementById('generateBtn');
|
|
|
const status = document.getElementById('status');
|
|
|
const result = document.getElementById('result');
|
|
|
|
|
|
btn.disabled = true;
|
|
|
status.textContent = 'Generating... (First run may take longer to load models)';
|
|
|
result.style.display = 'none';
|
|
|
|
|
|
try {
|
|
|
const response = await fetch('/generate', {
|
|
|
method: 'POST',
|
|
|
headers: {'Content-Type': 'application/json'},
|
|
|
body: JSON.stringify({
|
|
|
prompt: document.getElementById('prompt').value,
|
|
|
width: parseInt(document.getElementById('width').value),
|
|
|
height: parseInt(document.getElementById('height').value),
|
|
|
seed: parseInt(document.getElementById('seed').value),
|
|
|
num_steps: 8
|
|
|
})
|
|
|
});
|
|
|
|
|
|
const data = await response.json();
|
|
|
|
|
|
if (response.ok) {
|
|
|
document.getElementById('resultImg').src = 'data:image/png;base64,' + data.image_base64;
|
|
|
document.getElementById('resultInfo').textContent = '✅ Seed: ' + data.seed;
|
|
|
result.style.display = 'block';
|
|
|
status.textContent = '';
|
|
|
} else {
|
|
|
status.textContent = '❌ Error: ' + (data.detail || 'Unknown error');
|
|
|
}
|
|
|
} catch (e) {
|
|
|
status.textContent = '❌ Error: ' + e.message;
|
|
|
}
|
|
|
|
|
|
btn.disabled = false;
|
|
|
}
|
|
|
</script>
|
|
|
</body>
|
|
|
</html>
|
|
|
"""
|
|
|
|
|
|
@app.post("/generate", response_model=GenerateResponse)
|
|
|
async def generate(request: GenerateRequest):
|
|
|
"""Generate an image from text prompt using GGUF model"""
|
|
|
try:
|
|
|
model = load_model()
|
|
|
|
|
|
seed = request.seed
|
|
|
if seed == -1:
|
|
|
seed = random.randint(0, 2147483647)
|
|
|
|
|
|
width = min(max(request.width, 256), 1024)
|
|
|
height = min(max(request.height, 256), 1024)
|
|
|
|
|
|
print(f"Generating: '{request.prompt[:50]}...' at {width}x{height}, seed={seed}")
|
|
|
|
|
|
|
|
|
output = model.generate_image(
|
|
|
prompt=request.prompt,
|
|
|
width=width,
|
|
|
height=height,
|
|
|
cfg_scale=1.0,
|
|
|
sample_steps=request.num_steps,
|
|
|
seed=seed,
|
|
|
)
|
|
|
|
|
|
print(f"Output type: {type(output)}")
|
|
|
|
|
|
|
|
|
if isinstance(output, list):
|
|
|
|
|
|
img_data = output[0]
|
|
|
else:
|
|
|
img_data = output
|
|
|
|
|
|
|
|
|
if isinstance(img_data, bytes):
|
|
|
image = Image.open(io.BytesIO(img_data))
|
|
|
elif hasattr(img_data, 'data'):
|
|
|
|
|
|
image = Image.frombytes('RGB', (width, height), img_data.data)
|
|
|
elif hasattr(img_data, 'tobytes'):
|
|
|
|
|
|
import numpy as np
|
|
|
arr = np.array(img_data)
|
|
|
image = Image.fromarray(arr.astype('uint8'))
|
|
|
elif isinstance(img_data, Image.Image):
|
|
|
image = img_data
|
|
|
else:
|
|
|
|
|
|
image = Image.fromarray(img_data)
|
|
|
|
|
|
|
|
|
buffer = io.BytesIO()
|
|
|
image.save(buffer, format="PNG")
|
|
|
image_base64 = base64.b64encode(buffer.getvalue()).decode()
|
|
|
|
|
|
gc.collect()
|
|
|
|
|
|
return GenerateResponse(
|
|
|
image_base64=image_base64,
|
|
|
seed=seed,
|
|
|
status="success"
|
|
|
)
|
|
|
|
|
|
except Exception as e:
|
|
|
print(f"Error: {e}")
|
|
|
import traceback
|
|
|
traceback.print_exc()
|
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
@app.get("/health")
|
|
|
async def health():
|
|
|
return {"status": "ok", "model": "Z-Image-Turbo-GGUF"}
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
port = int(os.environ.get("PORT", 7860))
|
|
|
uvicorn.run(app, host="0.0.0.0", port=port)
|
|
|
|