Spaces:

Lyon28
/

Image-generation

Running

File size: 22,401 Bytes
import os
import io
import re
import random
import requests
import numpy as np
import torch
import gradio as gr
from PIL import Image
from openai import OpenAI
from diffusers import (
    StableDiffusionXLPipeline,
    StableDiffusionPipeline,
    EulerAncestralDiscreteScheduler,
    DPMSolverMultistepScheduler,
    KDPM2AncestralDiscreteScheduler,
)

# DEVICE
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
IS_CPU = device.type == "cpu"
DTYPE  = torch.float32 if IS_CPU else torch.float16
print(f"[INFO] Device: {device} | dtype: {DTYPE}")

#app = gr.mount_gradio_app(
#    FastAPI(),
#    demo,
#    path="/"
#)

# MODELS
SDXL_MODELS = {
    #"Tongyi-MAI/Z-Image-Turbo":                      "Z-Image Turbo ⚡",
    "Heartsync/NSFW-Uncensored":                     "NSFW Uncensored",
    "stabilityai/stable-diffusion-xl-base-1.0":      "SDXL Base 1.0",
    "RunDiffusion/Juggernaut-XL-v9":                 "Juggernaut XL v9",
    "SG161222/RealVisXL_V4.0":                       "RealVisXL V4.0",
    "cagliostrolab/animagine-xl-3.1":                "Animagine XL 3.1",
    "Lykon/dreamshaper-xl-1-0":                      "DreamShaper XL",
    "playgroundai/playground-v2.5-1024px-aesthetic": "Playground v2.5",
    "dataautogpt3/OpenDalleV1.1":                    "OpenDalle V1.1",
    "fluently/Fluently-XL-Final":                    "Fluently XL",
    "Corcelio/mobius":                               "Mobius",
}

SD15_MODELS = {
    "Tongyi-MAI/Z-Image-Turbo":                      "Z-Image Turbo ⚡",
    "runwayml/stable-diffusion-v1-5":               "SD 1.5 (Ringan)",
    "Lykon/dreamshaper-8":                          "DreamShaper 8",
    "stablediffusionapi/realistic-vision-v51":      "Realistic Vision v5.1",
    "stablediffusionapi/anything-v5":               "Anything v5 (Anime)",
    "stablediffusionapi/chilloutmix":               "ChilloutMix",
    "digiplay/AbsoluteReality_v1.8.1":              "AbsoluteReality v1.8",
}

AVAILABLE_MODELS = {**SDXL_MODELS, **SD15_MODELS}

TURBO_MODELS = {
    "Tongyi-MAI/Z-Image-Turbo",
}

# ------------------------------------------------------------
# SCHEDULERS
# ------------------------------------------------------------
SCHEDULERS = {
    "Euler Ancestral": "EulerAncestral",
    "DPM++ 2M Karras": "DPM++2MKarras",
    "KDPM2 Ancestral": "KDPM2Ancestral",
}

def set_scheduler(pipe, key):
    if key == "EulerAncestral":
        pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
    elif key == "DPM++2MKarras":
        pipe.scheduler = DPMSolverMultistepScheduler.from_config(
            pipe.scheduler.config, use_karras_sigmas=True)
    elif key == "KDPM2Ancestral":
        pipe.scheduler = KDPM2AncestralDiscreteScheduler.from_config(pipe.scheduler.config)
    return pipe

# ------------------------------------------------------------
# PIPELINE CACHE
# ------------------------------------------------------------
loaded_pipelines = {}

def load_pipeline(model_id):
    if model_id in loaded_pipelines:
        return loaded_pipelines[model_id]

    print(f"[INFO] Loading: {model_id}")
    is_xl = model_id in SDXL_MODELS
    PipeClass = StableDiffusionXLPipeline if is_xl else StableDiffusionPipeline

    for kwargs in [
        {"torch_dtype": DTYPE, "variant": "fp16" if not IS_CPU else None, "use_safetensors": True},
        {"torch_dtype": DTYPE, "use_safetensors": True},
        {"torch_dtype": DTYPE},
    ]:
        try:
            kwargs = {k: v for k, v in kwargs.items() if v is not None}
            pipe = PipeClass.from_pretrained(model_id, **kwargs)
            break
        except Exception as e:
            print(f"[WARN] Load attempt failed: {e}")
            continue
    else:
        raise RuntimeError(f"Failed to load model: {model_id}")

    pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
    pipe.to(device)

    if IS_CPU:
        pipe.enable_attention_slicing()

    loaded_pipelines[model_id] = pipe
    print(f"[INFO] Loaded: {model_id}")
    return pipe

# ------------------------------------------------------------
# OPENROUTER
# ------------------------------------------------------------
or_api_key = os.getenv("OPENROUTER_API_KEY")
or_client  = None
if or_api_key:
    try:
        or_client = OpenAI(base_url="https://openrouter.ai/api/v1", api_key=or_api_key)
        print("[INFO] OpenRouter ready")
    except Exception as e:
        print(f"[ERROR] OpenRouter: {e}")

HF_TOKEN  = os.getenv("HF_TOKEN")
LLM_MODEL = "google/gemini-2.0-flash-001"

# ------------------------------------------------------------
# LANGUAGE
# ------------------------------------------------------------
def is_non_english(text):
    if not text: return False
    for pattern in [r'[\uac00-\ud7a3]', r'[\u3040-\u30ff]',
                    r'[\u4e00-\u9fff]', r'[^\x00-\x7F]']:
        if re.search(pattern, text): return True
    return False

def translate_to_english(text):
    if not or_client or not is_non_english(text):
        return text
    try:
        resp = or_client.chat.completions.create(
            model=LLM_MODEL,
            messages=[
                {"role": "system", "content": "Translate to English. Only provide translation."},
                {"role": "user",   "content": text},
            ],
            temperature=0.1,
        )
        result = resp.choices[0].message.content.strip()
        return result if len(result) > 3 else text
    except Exception as e:
        print(f"[ERROR] Translate: {e}")
        return text

# ------------------------------------------------------------
# EXAMPLES & TEMPLATES
# ------------------------------------------------------------
prompt_examples = [
    "Hyperrealistic portrait of a beautiful woman, studio lighting, 8k photography",
    "Anime girl with long silver hair, school uniform, cherry blossoms, detailed art",
    "A couple sharing a passionate kiss in Paris at night, Eiffel Tower glowing, cinematic",
    "Digital art, fantasy woman warrior, detailed armor, epic lighting, concept art",
    "Mature anime woman, elegant kimono, traditional Japanese setting, high quality",
    "Watercolor illustration, graceful woman, flowing dress, soft pastel colors",
    "Fashion photography, elegant model, dramatic lighting, high fashion, detailed",
    "Two lovers walking on a moonlit beach, waves crashing, intimate moment",
    "Moody anime scene, neon lights, rain, two characters, sensual atmosphere",
    "Oil painting style, figure study, classical art, soft colors, detailed brushwork",
]

TEMPLATES = {
    "🎭 Romantic": [
        "A couple in a passionate embrace, soft candlelight, romantic atmosphere, detailed",
        "Two lovers walking on a moonlit beach, waves crashing, intimate moment, photorealistic",
        "A woman in elegant evening dress, soft lighting, romantic dinner setting, beautiful",
    ],
    "🌸 Anime": [
        "Anime girl with long silver hair, school uniform, cherry blossoms, detailed anime art",
        "Mature anime woman, elegant kimono, traditional Japanese setting, high quality",
        "Moody anime scene, neon lights, rain, two characters, sensual atmosphere",
    ],
    "📸 Realistic": [
        "Hyperrealistic portrait of a beautiful woman, studio lighting, 8k photography",
        "Cinematic shot, beautiful woman, soft natural light, photorealistic, high detail",
        "Fashion photography, elegant model, dramatic lighting, high fashion, detailed",
    ],
    "🎨 Artistic": [
        "Oil painting style, figure study, classical art, soft colors, detailed brushwork",
        "Digital art, fantasy woman warrior, detailed armor, epic lighting, concept art",
        "Watercolor illustration, graceful woman, flowing dress, soft pastel colors",
    ],
}

# ------------------------------------------------------------
# BOOST
# ------------------------------------------------------------
def boost_prompt(keyword):
    if not keyword or not keyword.strip():
        return "Please enter a keyword first."
    if is_non_english(keyword):
        keyword = translate_to_english(keyword)
    if not or_client:
        return f"{keyword}, highly detailed, beautiful, masterpiece, best quality, 8k"
    try:
        resp = or_client.chat.completions.create(
            model=LLM_MODEL,
            messages=[
                {"role": "system", "content":
                    "Generate ONE detailed image prompt in English. 1-3 sentences. No prefixes. Just the prompt."},
                {"role": "user", "content": keyword},
            ],
            temperature=0.8,
        )
        result = resp.choices[0].message.content.strip()
        result = re.sub(r'^(Prompt:|Output:|Result:)\s*', '', result, flags=re.IGNORECASE)
        if is_non_english(result):
            result = translate_to_english(result)
        return result if len(result) > 10 else f"{keyword}, detailed, high quality"
    except Exception as e:
        return f"{keyword}, highly detailed, beautiful, masterpiece"

def get_random_prompt():
    return random.choice(prompt_examples)

# ------------------------------------------------------------
# HF INFERENCE API
# ------------------------------------------------------------
HF_API_SUPPORTED = {
   # "Tongyi-MAI/Z-Image-Turbo",
    "stabilityai/stable-diffusion-xl-base-1.0",
    "runwayml/stable-diffusion-v1-5",
    "Lykon/dreamshaper-8",
    "Lykon/dreamshaper-xl-1-0",
    "cagliostrolab/animagine-xl-3.1",
    "dataautogpt3/OpenDalleV1.1",
}

def infer_via_api(prompt, negative_prompt, model_id, width, height, steps, guidance):
    if not HF_TOKEN or model_id not in HF_API_SUPPORTED:
        return None
    try:
        print(f"[INFO] HF API: {model_id}")
        r = requests.post(
            f"https://api-inference.huggingface.co/models/{model_id}",
            headers={"Authorization": f"Bearer {HF_TOKEN}"},
            json={
                "inputs": prompt,
                "parameters": {
                    "negative_prompt": negative_prompt,
                    "width":  min(width, 1024),
                    "height": min(height, 1024),
                    "num_inference_steps": steps,
                    "guidance_scale": guidance,
                },
            },
            timeout=120,
        )
        if r.status_code == 200:
            print("[INFO] HF API success")
            return Image.open(io.BytesIO(r.content))
        print(f"[WARN] HF API {r.status_code}")
        return None
    except Exception as e:
        print(f"[ERROR] HF API: {e}")
        return None

# ------------------------------------------------------------
# LOCAL INFERENCE
# ------------------------------------------------------------
def infer_local(prompt, negative_prompt, model_id, seed,
                width, height, guidance, steps, scheduler_key, clip_skip):
    try:
        pipe = load_pipeline(model_id)
        pipe = set_scheduler(pipe, scheduler_key)
    except Exception as e:
        print(f"[ERROR] Load model: {e}")
        return None

    # Turbo models: kurangi steps otomatis
    if model_id in TURBO_MODELS:
        steps = min(steps, 8)
        guidance = min(guidance, 5.0)
        print(f"[INFO] Turbo mode: steps={steps}, guidance={guidance}")

    generator = torch.Generator(device=device).manual_seed(seed)
    kwargs = dict(
        prompt=prompt,
        negative_prompt=negative_prompt,
        guidance_scale=guidance,
        num_inference_steps=steps,
        width=width,
        height=height,
        generator=generator,
    )
    if clip_skip > 1 and model_id in SDXL_MODELS:
        kwargs["clip_skip"] = clip_skip

    try:
        return pipe(**kwargs).images[0]
    except RuntimeError as e:
        print(f"[ERROR] Inference: {e}")
        return None

# ------------------------------------------------------------
# MAIN INFER
# ------------------------------------------------------------
MAX_SEED       = np.iinfo(np.int32).max
MAX_IMAGE_SIZE = 1216

def infer(model_id, prompt, negative_prompt, seed, randomize_seed,
          width, height, guidance_scale, num_inference_steps,
          scheduler_name, clip_skip):

    if is_non_english(prompt):
        prompt = translate_to_english(prompt)
    if is_non_english(negative_prompt):
        negative_prompt = translate_to_english(negative_prompt)

    if randomize_seed:
        seed = random.randint(0, MAX_SEED)

    scheduler_key = SCHEDULERS.get(scheduler_name, "EulerAncestral")

    print(f"[INFO] Model: {model_id} | Seed: {seed}")
    print(f"[INFO] Prompt: {prompt[:80]}")

    # 1. HF API
    image = infer_via_api(
        prompt, negative_prompt, model_id,
        width, height, num_inference_steps, guidance_scale
    )

    # 2. Local fallback
    if image is None:
        print("[INFO] Fallback to local")
        image = infer_local(
            prompt, negative_prompt, model_id, seed,
            width, height, guidance_scale, num_inference_steps,
            scheduler_key, clip_skip
        )

    if image is None:
        image = Image.new("RGB", (512, 512), color=(15, 15, 20))

    return image, seed

# ------------------------------------------------------------
# CSS
# ------------------------------------------------------------
css = """
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600&display=swap');
* { box-sizing: border-box; margin: 0; padding: 0; }
body { background: #0c0c10; color: #e0dff0; font-family: 'Inter', sans-serif; }
footer { display: none !important; }
#main { max-width: 700px; margin: 0 auto; padding: 24px 16px 48px; }
.hdr { text-align: center; margin-bottom: 24px; padding-bottom: 18px; border-bottom: 1px solid rgba(255,255,255,0.05); }
.hdr h1 { font-size: 1.4rem; font-weight: 600; color: #fff; letter-spacing: -0.02em; }
.hdr h1 span { color: #a78bfa; }
.hdr p { font-size: 0.72rem; color: rgba(255,255,255,0.25); margin-top: 3px; }
.sec { background: rgba(255,255,255,0.025); border: 1px solid rgba(255,255,255,0.055); border-radius: 11px; padding: 13px; margin-bottom: 9px; }
.sec-title { font-size: 0.63rem; font-weight: 600; text-transform: uppercase; letter-spacing: 0.1em; color: rgba(255,255,255,0.2); margin-bottom: 9px; }
.tpl-btn { background: rgba(255,255,255,0.02) !important; border: 1px solid rgba(255,255,255,0.06) !important; color: rgba(255,255,255,0.4) !important; border-radius: 7px !important; font-size: 0.71rem !important; padding: 7px 10px !important; cursor: pointer !important; text-align: left !important; transition: all 0.15s !important; }
.tpl-btn:hover { background: rgba(167,139,250,0.07) !important; border-color: rgba(167,139,250,0.2) !important; color: rgba(255,255,255,0.65) !important; }
#prompt-input textarea { background: rgba(255,255,255,0.04) !important; border: 1px solid rgba(255,255,255,0.08) !important; border-radius: 9px !important; color: #f0eeff !important; font-size: 0.88rem !important; min-height: 80px !important; }
#prompt-input textarea:focus { border-color: rgba(167,139,250,0.4) !important; }
#prompt-input label { display: none !important; }
#boost-btn { background: rgba(251,146,60,0.1) !important; border: 1px solid rgba(251,146,60,0.25) !important; color: #fb923c !important; border-radius: 8px !important; font-size: 0.75rem !important; }
#boost-btn:hover { background: rgba(251,146,60,0.2) !important; }
#random-btn { background: rgba(96,165,250,0.08) !important; border: 1px solid rgba(96,165,250,0.2) !important; color: #60a5fa !important; border-radius: 8px !important; font-size: 0.75rem !important; }
#gen-btn { background: linear-gradient(135deg, #7c3aed, #a78bfa) !important; border: none !important; border-radius: 10px !important; color: #fff !important; font-size: 0.9rem !important; font-weight: 600 !important; padding: 13px !important; box-shadow: 0 4px 18px rgba(124,58,237,0.3) !important; margin-top: 2px !important; }
#gen-btn:hover { transform: translateY(-1px) !important; box-shadow: 0 6px 24px rgba(124,58,237,0.45) !important; }
#out-img { border-radius: 10px !important; border: 1px solid rgba(255,255,255,0.07) !important; margin-top: 10px !important; }
.gr-accordion { background: rgba(255,255,255,0.02) !important; border: 1px solid rgba(255,255,255,0.06) !important; border-radius: 10px !important; margin-top: 10px !important; }
.gr-accordion .label-wrap { color: rgba(255,255,255,0.3) !important; font-size: 0.72rem !important; text-transform: uppercase !important; letter-spacing: 0.1em !important; }
label { color: rgba(255,255,255,0.35) !important; font-size: 0.72rem !important; }
select { background: rgba(255,255,255,0.05) !important; border: 1px solid rgba(255,255,255,0.1) !important; border-radius: 7px !important; color: #e0dff0 !important; }
input[type="checkbox"] { accent-color: #7c3aed !important; }
.cpu-warn { background: rgba(251,146,60,0.07); border: 1px solid rgba(251,146,60,0.18); border-radius: 8px; padding: 8px 12px; font-size: 0.7rem; color: #fb923c; margin-bottom: 10px; }
.div { height: 1px; background: rgba(255,255,255,0.04); margin: 10px 0; }
::-webkit-scrollbar { width: 3px; }
::-webkit-scrollbar-thumb { background: rgba(124,58,237,0.25); border-radius: 4px; }
"""

# ------------------------------------------------------------
# UI
# ------------------------------------------------------------
with gr.Blocks(css=css, theme=gr.themes.Base()) as demo:

    gr.HTML("""
    <div class="hdr">
        <h1>✦ <span>AI</span> Image Studio</h1>
        <p>Multi-model · CPU friendly · Auto-translate</p>
    </div>
    """ + ("""<div class="cpu-warn">⚠ CPU mode — pakai Z-Image Turbo atau SD 1.5 untuk hasil lebih cepat. HF_TOKEN disarankan untuk API inference.</div>""" if IS_CPU else ""))

    with gr.Column(elem_id="main"):

        # Model
        with gr.Group(elem_classes=["sec"]):
            gr.HTML('<div class="sec-title">Model</div>')
            model_selector = gr.Dropdown(
                choices=[(v, k) for k, v in AVAILABLE_MODELS.items()],
                value="Tongyi-MAI/Z-Image-Turbo",
                show_label=False,
            )
            gr.HTML("""
            <div style="font-size:0.65rem;color:rgba(255,255,255,0.18);margin-top:5px;">
            ⚡ Z-Image Turbo — paling cepat di CPU &nbsp;·&nbsp;
            SD 1.5 — ringan &nbsp;·&nbsp;
            SDXL — butuh GPU untuk kecepatan wajar
            </div>
            """)

        # Templates
        with gr.Group(elem_classes=["sec"]):
            gr.HTML('<div class="sec-title">Template Prompts</div>')
            all_btns = []
            with gr.Tabs():
                for cat, items in TEMPLATES.items():
                    with gr.TabItem(cat):
                        for t in items:
                            label = t[:58] + "…" if len(t) > 58 else t
                            b = gr.Button(label, elem_classes=["tpl-btn"], size="sm")
                            all_btns.append((b, t))

        # Boost
        with gr.Group(elem_classes=["sec"]):
            gr.HTML('<div class="sec-title">AI Prompt Boost</div>')
            keyword_input = gr.Text(
                show_label=False,
                max_lines=1,
                placeholder="Keyword / tema dalam bahasa apapun…",
            )
            with gr.Row():
                boost_btn  = gr.Button("✦ Boost",  elem_id="boost-btn")
                random_btn = gr.Button("⟳ Random", elem_id="random-btn")

        # Prompt
        with gr.Group(elem_classes=["sec"]):
            gr.HTML('<div class="sec-title">Prompt</div>')
            prompt = gr.Text(
                show_label=False,
                max_lines=4,
                placeholder="Describe your image… (any language)",
                elem_id="prompt-input",
            )

        # Generate
        gen_btn = gr.Button("Generate Image ↗", elem_id="gen-btn", variant="primary")

        # Output
        output_image = gr.Image(show_label=False, elem_id="out-img", type="pil")

        # Advanced
        with gr.Accordion("⚙  Advanced Settings", open=False):
            negative_prompt = gr.Text(
                label="Negative Prompt",
                max_lines=2,
                value="text, watermark, signature, low quality, blurry, deformed, ugly",
            )
            gr.HTML('<div class="div"></div>')
            with gr.Row():
                scheduler  = gr.Dropdown(choices=list(SCHEDULERS.keys()), value="Euler Ancestral", label="Scheduler")
                clip_skip  = gr.Slider(label="CLIP Skip", minimum=1, maximum=4, step=1, value=1)
            gr.HTML('<div class="div"></div>')
            with gr.Row():
                width  = gr.Slider(label="Width",  minimum=256, maximum=MAX_IMAGE_SIZE, step=64, value=512 if IS_CPU else 1024)
                height = gr.Slider(label="Height", minimum=256, maximum=MAX_IMAGE_SIZE, step=64, value=512 if IS_CPU else 1024)
            with gr.Row():
                guidance_scale      = gr.Slider(label="Guidance Scale", minimum=1.0, maximum=20.0, step=0.5, value=5.0 if IS_CPU else 7.0)
                num_inference_steps = gr.Slider(label="Steps", minimum=1, maximum=50, step=1, value=6 if IS_CPU else 20)
            gr.HTML('<div class="div"></div>')
            with gr.Row():
                seed           = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
                randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
            seed_display = gr.Number(label="Last Seed", value=0, interactive=False)

    # Events
    boost_btn.click(fn=boost_prompt, inputs=[keyword_input], outputs=[prompt])
    random_btn.click(fn=get_random_prompt, inputs=[], outputs=[prompt])
    for btn, txt in all_btns:
        btn.click(fn=lambda t=txt: t, inputs=[], outputs=[prompt])

    gen_btn.click(
        fn=infer,
        inputs=[model_selector, prompt, negative_prompt, seed, randomize_seed,
                width, height, guidance_scale, num_inference_steps, scheduler, clip_skip],
        outputs=[output_image, seed_display],
    )

from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
import uvicorn

app = FastAPI()

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_methods=["*"],
    allow_headers=["*"],
)

app = gr.mount_gradio_app(app, demo, path="/")

if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=7860)