ImageGen / app.py
tomiconic's picture
Update app.py
1c80f45 verified
import gradio as gr
import torch
import spaces
from diffusers import StableDiffusionXLPipeline, DPMSolverMultistepScheduler
from huggingface_hub import hf_hub_download, InferenceClient
import random
import os
import re
# โ”€โ”€ Config โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
HF_TOKEN = os.environ.get("HF_TOKEN", None)
MODEL_REPO = "John6666/nova-3dcg-xl-illustrious-v40-sdxl"
# Quality tags for Illustrious-based models
IL_POS = "masterpiece, best quality, very aesthetic, absurdres, "
IL_NEG = "worst quality, low quality, bad quality, ugly, "
# โ”€โ”€ LLM client โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
llm_client = InferenceClient(
model="mistralai/Mistral-7B-Instruct-v0.3",
token=HF_TOKEN,
)
EXPANSION_SYSTEM = """You are an expert Stable Diffusion prompt engineer specialising in 3DCG character art and illustration.
Your job: take a short user description and rewrite it as a detailed, accurate image generation prompt optimised for a 3D CGI character art model (Nova 3DCG XL).
Rules:
- PRESERVE every specific detail โ€” colours, numbers, states, accessories, clothing
- Wrap unique specific details in attention weights e.g. (red scarf:1.4), (one eye closed:1.3)
- Add: character pose, expression, lighting, background atmosphere, material quality, render style
- Add 3DCG-appropriate quality boosters: sharp edges, subsurface scattering, ray tracing, ambient occlusion
- Do NOT add NSFW content
- Do NOT invent things not implied by the user
- Return ONLY the final prompt โ€” no explanation, no preamble, no quotes
- Keep under 130 words
- Use comma-separated tags and phrases"""
def expand_prompt_llm(raw_prompt, style):
if not raw_prompt.strip():
return ""
style_hint = f" The desired style is: {style}." if style != "Auto" else ""
user_msg = f"Expand this into a detailed 3DCG character art prompt:{style_hint}\n\n{raw_prompt.strip()}"
try:
response = llm_client.chat_completion(
messages=[
{"role": "system", "content": EXPANSION_SYSTEM},
{"role": "user", "content": user_msg},
],
max_tokens=220,
temperature=0.7,
)
expanded = response.choices[0].message.content.strip()
expanded = expanded.strip('"').strip("'")
if expanded.lower().startswith("prompt:"):
expanded = expanded[7:].strip()
return expanded
except Exception as e:
print(f"LLM expansion failed: {e}")
return raw_prompt.strip()
# โ”€โ”€ Load model โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
print(f"Loading Nova 3DCG XL from {MODEL_REPO}...")
pipe = StableDiffusionXLPipeline.from_pretrained(
MODEL_REPO,
torch_dtype=torch.float16,
token=HF_TOKEN,
)
pipe.scheduler = DPMSolverMultistepScheduler.from_config(
pipe.scheduler.config,
use_karras_sigmas=True,
)
pipe.enable_attention_slicing()
print("Pipeline ready.")
# โ”€โ”€ Style presets โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
STYLES = {
"Auto": {"pos": "", "neg": ""},
"๐ŸŽฎ 3DCG Render": {
"pos": "3DCG render, Pixar style, ray tracing, subsurface scattering, ambient occlusion, sharp edges, studio lighting, ",
"neg": "flat, 2D, anime flat colour, sketch, ",
},
"โš”๏ธ Fantasy": {
"pos": "fantasy character, epic armour, magical atmosphere, dramatic lighting, volumetric fog, concept art, artstation, ",
"neg": "modern, mundane, sci-fi, ",
},
"๐Ÿค– Sci-Fi": {
"pos": "sci-fi character, futuristic suit, neon accents, holographic elements, dark background, cinematic, ",
"neg": "medieval, fantasy, nature, ",
},
"๐ŸŒธ Stylised": {
"pos": "stylised illustration, vibrant colours, soft cel shading, clean lineart, anime-adjacent, ",
"neg": "photorealistic, gritty, dark, ",
},
"๐ŸŽฌ Cinematic": {
"pos": "cinematic portrait, dramatic rim lighting, shallow depth of field, film grain, color graded, ",
"neg": "flat, overexposed, sketch, ",
},
"๐Ÿ™๏ธ Urban": {
"pos": "urban streetwear character, city background, neon lights, night scene, realistic clothing, ",
"neg": "fantasy, medieval, nature, ",
},
}
# โ”€โ”€ LoRAs โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
LORAS = {
"None": None,
"โœ‹ Better Hands": {
"repo": "WolfAether21/PONY-DIFFUSION-SDXL-LORA",
"file": "Perfect Hands v2.safetensors",
"strength": 0.7,
},
"๐Ÿ” More Detail": {
"repo": "WolfAether21/PONY-DIFFUSION-SDXL-LORA",
"file": "SDXL Detail.safetensors",
"strength": 0.6,
},
}
# โ”€โ”€ Generation โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
@spaces.GPU(duration=180)
def generate(raw_prompt, negative_prompt, style, lora_name, lora_strength,
width, height, steps, guidance, seed, randomize, show_expanded):
if not raw_prompt.strip():
raise gr.Error("Please enter a prompt.")
if randomize:
seed = random.randint(0, 2**32 - 1)
seed = int(seed)
# LLM expansion
expanded = expand_prompt_llm(raw_prompt, style)
style_data = STYLES.get(style, STYLES["Auto"])
final_pos = IL_POS + style_data["pos"] + expanded
final_neg = IL_NEG + style_data["neg"] + negative_prompt.strip()
pipe.to("cuda")
# LoRA
lora_loaded = False
lora_data = LORAS.get(lora_name)
if lora_data:
try:
lp = hf_hub_download(
repo_id=lora_data["repo"],
filename=lora_data["file"],
token=HF_TOKEN,
)
pipe.load_lora_weights(lp)
pipe.fuse_lora(lora_scale=float(lora_strength))
lora_loaded = True
except Exception as e:
print(f"LoRA failed, skipping: {e}")
generator = torch.Generator(device="cpu").manual_seed(seed)
result = pipe(
prompt=final_pos,
negative_prompt=final_neg,
width=int(width),
height=int(height),
num_inference_steps=int(steps),
guidance_scale=float(guidance),
generator=generator,
clip_skip=1,
)
if lora_loaded:
pipe.unfuse_lora()
pipe.unload_lora_weights()
pipe.to("cpu")
debug = f"**Expanded prompt:**\n\n{final_pos}" if show_expanded else ""
return result.images[0], seed, debug
# โ”€โ”€ CSS โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
css = """
* { box-sizing: border-box; margin: 0; padding: 0; }
body, .gradio-container {
background: #07070e !important;
font-family: 'Inter', system-ui, -apple-system, sans-serif !important;
max-width: 500px !important;
margin: 0 auto !important;
padding: 8px !important;
}
.topbar {
display: flex;
align-items: center;
justify-content: space-between;
padding: 10px 2px 14px;
}
.topbar-title {
color: #e8e0ff;
font-size: 0.95em;
font-weight: 800;
}
.gpu-pill {
background: #1aff7a18;
border: 1px solid #1aff7a44;
color: #1aff7a;
font-size: 0.6em;
font-weight: 800;
padding: 4px 12px;
border-radius: 20px;
letter-spacing: 1.5px;
text-transform: uppercase;
}
.img-out {
background: #0d0d1a;
border: 1px solid #16162a;
border-radius: 20px;
overflow: hidden;
margin-bottom: 8px;
min-height: 380px;
display: flex;
align-items: center;
justify-content: center;
}
.img-out img {
width: 100% !important;
border-radius: 20px;
display: block;
}
.seed-pill input[type=number] {
background: transparent !important;
border: none !important;
color: #2e2848 !important;
font-size: 0.7em !important;
text-align: center !important;
padding: 2px !important;
width: 100% !important;
}
.card {
background: #0d0d1a;
border: 1px solid #16162a;
border-radius: 14px;
padding: 14px;
margin-bottom: 8px;
}
.card-label {
color: #3d3060;
font-size: 0.62em;
font-weight: 800;
text-transform: uppercase;
letter-spacing: 2px;
margin-bottom: 8px;
}
textarea {
background: transparent !important;
border: none !important;
color: #c8b8f0 !important;
font-size: 15px !important;
line-height: 1.6 !important;
padding: 0 !important;
resize: none !important;
box-shadow: none !important;
width: 100% !important;
outline: none !important;
}
textarea::placeholder { color: #252038 !important; }
textarea:focus {
outline: none !important;
box-shadow: none !important;
border: none !important;
}
.style-wrap .gr-radio {
display: flex !important;
flex-wrap: wrap !important;
gap: 6px !important;
}
.style-wrap label {
background: #0d0d1a !important;
border: 1px solid #1a1a2e !important;
border-radius: 30px !important;
color: #4a3a6a !important;
font-size: 0.75em !important;
font-weight: 600 !important;
padding: 6px 14px !important;
cursor: pointer !important;
transition: all 0.15s ease !important;
white-space: nowrap !important;
}
.style-wrap label:has(input:checked) {
background: #18083a !important;
border-color: #7744ee !important;
color: #bb99ff !important;
box-shadow: 0 0 10px #7744ee33 !important;
}
.style-wrap input[type=radio] { display: none !important; }
.gradio-accordion {
background: #0d0d1a !important;
border: 1px solid #16162a !important;
border-radius: 14px !important;
margin-bottom: 8px !important;
overflow: hidden !important;
}
.gradio-accordion .label-wrap button {
color: #4a3a6a !important;
font-size: 0.72em !important;
font-weight: 700 !important;
text-transform: uppercase !important;
letter-spacing: 1.5px !important;
padding: 12px 16px !important;
}
.gradio-slider {
background: transparent !important;
border: none !important;
padding: 4px 0 10px !important;
}
input[type=range] {
accent-color: #6633bb !important;
width: 100% !important;
}
input[type=number] {
background: #0a0a14 !important;
border: 1px solid #18182a !important;
border-radius: 10px !important;
color: #9977cc !important;
font-size: 13px !important;
padding: 8px 10px !important;
}
input[type=checkbox] { accent-color: #6633bb !important; }
.gradio-checkbox label span {
color: #4a3a6a !important;
font-size: 0.75em !important;
font-weight: 600 !important;
}
.gradio-dropdown {
background: #0a0a14 !important;
border: 1px solid #18182a !important;
border-radius: 10px !important;
}
label > span:first-child {
color: #3a2d55 !important;
font-size: 0.7em !important;
font-weight: 700 !important;
text-transform: uppercase !important;
letter-spacing: 1px !important;
}
.debug-box {
background: #080814;
border: 1px solid #111122;
border-radius: 10px;
padding: 10px 12px;
color: #443366;
font-size: 0.7em;
line-height: 1.7;
font-family: monospace;
word-break: break-word;
margin-bottom: 8px;
min-height: 10px;
}
.gen-btn button {
background: linear-gradient(135deg, #4a1aaa 0%, #2d0e77 100%) !important;
border: 1px solid #6633cc !important;
border-radius: 14px !important;
color: #fff !important;
font-size: 0.88em !important;
font-weight: 900 !important;
padding: 17px !important;
width: 100% !important;
letter-spacing: 2px !important;
text-transform: uppercase !important;
box-shadow: 0 4px 24px #4a1aaa55 !important;
transition: all 0.15s ease !important;
margin-top: 6px !important;
}
.gen-btn button:hover {
box-shadow: 0 6px 32px #4a1aaa99 !important;
transform: translateY(-1px) !important;
}
.gen-btn button:active {
transform: scale(0.98) !important;
box-shadow: 0 2px 12px #4a1aaa33 !important;
}
footer, .built-with { display: none !important; }
"""
# โ”€โ”€ UI โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
with gr.Blocks(css=css, title="ImageGen") as demo:
gr.HTML("""
<div class="topbar">
<span class="topbar-title">Nova 3DCG XL</span>
<span class="gpu-pill">โšก ZeroGPU</span>
</div>
""")
output_image = gr.Image(
show_label=False, type="pil",
height=460, elem_classes="img-out",
)
used_seed = gr.Number(
label="seed", interactive=False,
elem_classes="seed-pill",
)
gr.HTML('<div class="card"><div class="card-label">โœฆ Prompt โ€” describe your character</div>')
prompt = gr.Textbox(
show_label=False,
placeholder="warrior woman in red armour, glowing sword, forest background...",
lines=3,
)
gr.HTML('</div>')
gr.HTML('<div class="card-label" style="padding:4px 2px 8px;color:#3d3060;font-size:0.62em;font-weight:800;text-transform:uppercase;letter-spacing:2px;">Style</div>')
style = gr.Radio(
choices=list(STYLES.keys()),
value="Auto",
show_label=False,
elem_classes="style-wrap",
)
generate_btn = gr.Button(
"Generate โœฆ", variant="primary",
size="lg", elem_classes="gen-btn",
)
expanded_out = gr.Markdown(
value="",
elem_classes="debug-box",
)
with gr.Accordion("โš™๏ธ Settings", open=False):
gr.HTML('<div style="height:6px"></div>')
negative_prompt = gr.Textbox(
label="Negative Prompt",
value=(
"worst quality, low quality, bad anatomy, bad hands, "
"extra limbs, missing limbs, watermark, signature, "
"blurry, deformed, ugly, text"
),
lines=2,
)
with gr.Row():
width = gr.Slider(512, 1024, value=832, step=64, label="Width")
height = gr.Slider(512, 1216, value=1216, step=64, label="Height")
steps = gr.Slider(20, 60, value=30, step=1, label="Steps")
guidance = gr.Slider(1.0, 10.0, value=6.0, step=0.5, label="CFG Scale")
with gr.Row():
seed = gr.Number(
label="Seed", value=42, precision=0,
minimum=0, maximum=2**32-1, scale=3,
)
randomize = gr.Checkbox(label="Random seed", value=True, scale=1)
show_expanded = gr.Checkbox(
label="Show expanded prompt",
value=True,
)
with gr.Accordion("๐ŸŽจ LoRA", open=False):
gr.HTML('<div style="height:6px"></div>')
lora_name = gr.Dropdown(choices=list(LORAS.keys()), value="None", label="LoRA")
lora_strength = gr.Slider(0.1, 1.0, value=0.7, step=0.05, label="Strength")
generate_btn.click(
fn=generate,
inputs=[
prompt, negative_prompt, style, lora_name, lora_strength,
width, height, steps, guidance, seed, randomize, show_expanded,
],
outputs=[output_image, used_seed, expanded_out],
)
demo.launch()