Spaces:

tomiconic
/

ImageGen

Configuration error

App Files Files Community

tomiconic commited on Mar 21

Commit

1c80f45

verified ·

1 Parent(s): c51a859

Update app.py

Browse files

Files changed (1) hide show

app.py +72 -79

app.py CHANGED Viewed

@@ -5,104 +5,101 @@ from diffusers import StableDiffusionXLPipeline, DPMSolverMultistepScheduler
 from huggingface_hub import hf_hub_download, InferenceClient
 import random
 import os
-# ── HF Inference client (prompt expansion LLM) ────────────────────────────────
-HF_TOKEN = os.environ.get("HF_TOKEN", None)
 llm_client = InferenceClient(
     model="mistralai/Mistral-7B-Instruct-v0.3",
     token=HF_TOKEN,
 )
-# ── Image model — CyberIllustrious ────────────────────────────────────────────
-MODEL_REPO = "cyberdelia/latest_sdxl_models"
-MODEL_FILE = "CyberIllustrious_V8.0alt.safetensors"
-IL_POS     = "masterpiece, best quality, very aesthetic, absurdres, "
-IL_NEG     = "worst quality, low quality, bad quality, ugly, "
-print("Downloading CyberIllustrious...")
-local_path = hf_hub_download(
-    repo_id=MODEL_REPO,
-    filename=MODEL_FILE,
-    token=HF_TOKEN,
-)
-print("Loading pipeline...")
-pipe = StableDiffusionXLPipeline.from_single_file(local_path, torch_dtype=torch.float16)
-pipe.scheduler = DPMSolverMultistepScheduler.from_config(
-    pipe.scheduler.config, use_karras_sigmas=True
-)
-pipe.enable_attention_slicing()
-print("Ready.")
-# ── LLM prompt expansion ──────────────────────────────────────────────────────
-EXPANSION_SYSTEM = """You are an expert Stable Diffusion prompt engineer specialising in photorealistic and cinematic image generation.
-Your job: take a short user description and rewrite it as a detailed, accurate image generation prompt.
 Rules:
-- PRESERVE every specific detail from the input — if they say "one window open", "rainbow", "red door", those MUST appear
-- Wrap unique/specific details in attention weights like (one window open:1.4) or (rainbow:1.3)
-- Add: lighting description, camera/lens style, atmosphere, material textures, composition
-- Add quality boosters appropriate to the scene
-- Do NOT add people unless the user mentioned people
-- Do NOT change the subject or invent things not implied
 - Return ONLY the final prompt — no explanation, no preamble, no quotes
-- Keep it under 120 words
-- Use comma-separated tags and phrases, not full sentences"""
 def expand_prompt_llm(raw_prompt, style):
-    """Use Mistral to expand the user's short prompt Fooocus-style."""
     if not raw_prompt.strip():
         return ""
     style_hint = f" The desired style is: {style}." if style != "Auto" else ""
-    user_msg = f"Expand this into a detailed image generation prompt:{style_hint}\n\n{raw_prompt.strip()}"
     try:
         response = llm_client.chat_completion(
             messages=[
                 {"role": "system", "content": EXPANSION_SYSTEM},
                 {"role": "user",   "content": user_msg},
             ],
-            max_tokens=200,
             temperature=0.7,
         )
         expanded = response.choices[0].message.content.strip()
-        # Clean up any accidental quotes or preamble
         expanded = expanded.strip('"').strip("'")
         if expanded.lower().startswith("prompt:"):
             expanded = expanded[7:].strip()
         return expanded
     except Exception as e:
-        print(f"LLM expansion failed, using raw prompt: {e}")
         return raw_prompt.strip()
 # ── Style presets ─────────────────────────────────────────────────────────────
 STYLES = {
-    "Auto":         {"pos": "", "neg": ""},
-    "📸 Photo":     {
-        "pos": "RAW photo, photorealistic, DSLR, 8k uhd, film grain, Fujifilm XT3, sharp focus, ",
-        "neg": "painting, illustration, cartoon, anime, cgi, render, ",
     },
-    "🎬 Cinematic": {
-        "pos": "cinematic movie still, anamorphic lens, film grain, color graded, dramatic lighting, ",
-        "neg": "flat lighting, amateur, snapshot, overexposed, ",
     },
-    "🖼️ Portrait":  {
-        "pos": "professional portrait, studio lighting, 85mm lens, bokeh, sharp eyes, skin texture, ",
-        "neg": "wide angle distortion, bad eyes, cropped head, ",
     },
-    "🌆 Neon City": {
-        "pos": "cyberpunk city, neon lights, rain reflections, night scene, blade runner aesthetic, ",
-        "neg": "daytime, rural, nature, warm tones, ",
     },
-    "✨ Fantasy":    {
-        "pos": "fantasy art, epic, magical atmosphere, volumetric lighting, concept art, artstation, ",
-        "neg": "modern, mundane, flat, ",
     },
-    "🎨 Painterly":  {
-        "pos": "oil painting, impressionist, visible brushstrokes, canvas texture, museum quality, ",
-        "neg": "photo, digital flat art, ",
     },
 }
@@ -133,18 +130,15 @@ def generate(raw_prompt, negative_prompt, style, lora_name, lora_strength,
         seed = random.randint(0, 2**32 - 1)
     seed = int(seed)
-    # ── LLM expansion ──
     expanded = expand_prompt_llm(raw_prompt, style)
-    # ── Build final prompt ──
     style_data = STYLES.get(style, STYLES["Auto"])
     final_pos  = IL_POS + style_data["pos"] + expanded
     final_neg  = IL_NEG + style_data["neg"] + negative_prompt.strip()
-    # ── Move to GPU ──
     pipe.to("cuda")
-    # ── Load LoRA ──
     lora_loaded = False
     lora_data = LORAS.get(lora_name)
     if lora_data:
@@ -179,10 +173,8 @@ def generate(raw_prompt, negative_prompt, style, lora_name, lora_strength,
     pipe.to("cpu")
-    # ── Debug output ──
-    debug_text = f"**Expanded prompt sent to model:**\n\n{final_pos}" if show_expanded else ""
-    return result.images[0], seed, debug_text
 # ── CSS ───────────────────────────────────────────────────────────────────────
 css = """
@@ -362,7 +354,6 @@ label > span:first-child {
     letter-spacing: 1px !important;
 }
-/* Expanded prompt debug box */
 .debug-box {
     background: #080814;
     border: 1px solid #111122;
@@ -374,6 +365,7 @@ label > span:first-child {
     font-family: monospace;
     word-break: break-word;
     margin-bottom: 8px;
 }
 .gen-btn button {
@@ -397,6 +389,7 @@ label > span:first-child {
 }
 .gen-btn button:active {
     transform: scale(0.98) !important;
 }
 footer, .built-with { display: none !important; }
@@ -407,7 +400,7 @@ with gr.Blocks(css=css, title="ImageGen") as demo:
     gr.HTML("""
     <div class="topbar">
-        <span class="topbar-title">CyberIllustrious</span>
         <span class="gpu-pill">⚡ ZeroGPU</span>
     </div>
     """)
@@ -421,10 +414,10 @@ with gr.Blocks(css=css, title="ImageGen") as demo:
         elem_classes="seed-pill",
     )
-    gr.HTML('<div class="card"><div class="card-label">✦ Prompt — write anything, short or long</div>')
     prompt = gr.Textbox(
         show_label=False,
-        placeholder="building with rainbow and one window open...",
         lines=3,
     )
     gr.HTML('</div>')
@@ -445,7 +438,6 @@ with gr.Blocks(css=css, title="ImageGen") as demo:
     expanded_out = gr.Markdown(
         value="",
         elem_classes="debug-box",
-        visible=True,
     )
     with gr.Accordion("⚙️  Settings", open=False):
@@ -455,7 +447,8 @@ with gr.Blocks(css=css, title="ImageGen") as demo:
             label="Negative Prompt",
             value=(
                 "worst quality, low quality, bad anatomy, bad hands, "
-                "signature, watermarks, ugly, blurry, deformed"
             ),
             lines=2,
         )
@@ -465,7 +458,7 @@ with gr.Blocks(css=css, title="ImageGen") as demo:
             height = gr.Slider(512, 1216, value=1216, step=64, label="Height")
         steps    = gr.Slider(20, 60,    value=30,  step=1,   label="Steps")
-        guidance = gr.Slider(1.0, 10.0, value=5.0, step=0.5, label="CFG Scale")
         with gr.Row():
             seed = gr.Number(
@@ -475,14 +468,14 @@ with gr.Blocks(css=css, title="ImageGen") as demo:
             randomize = gr.Checkbox(label="Random seed", value=True, scale=1)
         show_expanded = gr.Checkbox(
-            label="Show expanded prompt (see what the LLM wrote)",
             value=True,
         )
     with gr.Accordion("🎨  LoRA", open=False):
         gr.HTML('<div style="height:6px"></div>')
-        lora_name     = gr.Dropdown(choices=list(LORAS.keys()), value="None", label="LoRA preset")
-        lora_strength = gr.Slider(0.1, 1.0, value=0.7, step=0.05, label="LoRA Strength")
     generate_btn.click(
         fn=generate,

 from huggingface_hub import hf_hub_download, InferenceClient
 import random
 import os
+import re
+# ── Config ────────────────────────────────────────────────────────────────────
+HF_TOKEN   = os.environ.get("HF_TOKEN", None)
+MODEL_REPO = "John6666/nova-3dcg-xl-illustrious-v40-sdxl"
+# Quality tags for Illustrious-based models
+IL_POS = "masterpiece, best quality, very aesthetic, absurdres, "
+IL_NEG = "worst quality, low quality, bad quality, ugly, "
+# ── LLM client ────────────────────────────────────────────────────────────────
 llm_client = InferenceClient(
     model="mistralai/Mistral-7B-Instruct-v0.3",
     token=HF_TOKEN,
 )
+EXPANSION_SYSTEM = """You are an expert Stable Diffusion prompt engineer specialising in 3DCG character art and illustration.
+Your job: take a short user description and rewrite it as a detailed, accurate image generation prompt optimised for a 3D CGI character art model (Nova 3DCG XL).
 Rules:
+- PRESERVE every specific detail — colours, numbers, states, accessories, clothing
+- Wrap unique specific details in attention weights e.g. (red scarf:1.4), (one eye closed:1.3)
+- Add: character pose, expression, lighting, background atmosphere, material quality, render style
+- Add 3DCG-appropriate quality boosters: sharp edges, subsurface scattering, ray tracing, ambient occlusion
+- Do NOT add NSFW content
+- Do NOT invent things not implied by the user
 - Return ONLY the final prompt — no explanation, no preamble, no quotes
+- Keep under 130 words
+- Use comma-separated tags and phrases"""
 def expand_prompt_llm(raw_prompt, style):
     if not raw_prompt.strip():
         return ""
     style_hint = f" The desired style is: {style}." if style != "Auto" else ""
+    user_msg = f"Expand this into a detailed 3DCG character art prompt:{style_hint}\n\n{raw_prompt.strip()}"
     try:
         response = llm_client.chat_completion(
             messages=[
                 {"role": "system", "content": EXPANSION_SYSTEM},
                 {"role": "user",   "content": user_msg},
             ],
+            max_tokens=220,
             temperature=0.7,
         )
         expanded = response.choices[0].message.content.strip()
         expanded = expanded.strip('"').strip("'")
         if expanded.lower().startswith("prompt:"):
             expanded = expanded[7:].strip()
         return expanded
     except Exception as e:
+        print(f"LLM expansion failed: {e}")
         return raw_prompt.strip()
+# ── Load model ────────────────────────────────────────────────────────────────
+print(f"Loading Nova 3DCG XL from {MODEL_REPO}...")
+pipe = StableDiffusionXLPipeline.from_pretrained(
+    MODEL_REPO,
+    torch_dtype=torch.float16,
+    token=HF_TOKEN,
+)
+pipe.scheduler = DPMSolverMultistepScheduler.from_config(
+    pipe.scheduler.config,
+    use_karras_sigmas=True,
+)
+pipe.enable_attention_slicing()
+print("Pipeline ready.")
 # ── Style presets ─────────────────────────────────────────────────────────────
 STYLES = {
+    "Auto":           {"pos": "", "neg": ""},
+    "🎮 3DCG Render": {
+        "pos": "3DCG render, Pixar style, ray tracing, subsurface scattering, ambient occlusion, sharp edges, studio lighting, ",
+        "neg": "flat, 2D, anime flat colour, sketch, ",
     },
+    "⚔️ Fantasy":      {
+        "pos": "fantasy character, epic armour, magical atmosphere, dramatic lighting, volumetric fog, concept art, artstation, ",
+        "neg": "modern, mundane, sci-fi, ",
     },
+    "🤖 Sci-Fi":       {
+        "pos": "sci-fi character, futuristic suit, neon accents, holographic elements, dark background, cinematic, ",
+        "neg": "medieval, fantasy, nature, ",
     },
+    "🌸 Stylised":     {
+        "pos": "stylised illustration, vibrant colours, soft cel shading, clean lineart, anime-adjacent, ",
+        "neg": "photorealistic, gritty, dark, ",
     },
+    "🎬 Cinematic":    {
+        "pos": "cinematic portrait, dramatic rim lighting, shallow depth of field, film grain, color graded, ",
+        "neg": "flat, overexposed, sketch, ",
     },
+    "🏙️ Urban":        {
+        "pos": "urban streetwear character, city background, neon lights, night scene, realistic clothing, ",
+        "neg": "fantasy, medieval, nature, ",
     },
 }
         seed = random.randint(0, 2**32 - 1)
     seed = int(seed)
+    # LLM expansion
     expanded = expand_prompt_llm(raw_prompt, style)
     style_data = STYLES.get(style, STYLES["Auto"])
     final_pos  = IL_POS + style_data["pos"] + expanded
     final_neg  = IL_NEG + style_data["neg"] + negative_prompt.strip()
     pipe.to("cuda")
+    # LoRA
     lora_loaded = False
     lora_data = LORAS.get(lora_name)
     if lora_data:
     pipe.to("cpu")
+    debug = f"**Expanded prompt:**\n\n{final_pos}" if show_expanded else ""
+    return result.images[0], seed, debug
 # ── CSS ───────────────────────────────────────────────────────────────────────
 css = """
     letter-spacing: 1px !important;
 }
 .debug-box {
     background: #080814;
     border: 1px solid #111122;
     font-family: monospace;
     word-break: break-word;
     margin-bottom: 8px;
+    min-height: 10px;
 }
 .gen-btn button {
 }
 .gen-btn button:active {
     transform: scale(0.98) !important;
+    box-shadow: 0 2px 12px #4a1aaa33 !important;
 }
 footer, .built-with { display: none !important; }
     gr.HTML("""
     <div class="topbar">
+        <span class="topbar-title">Nova 3DCG XL</span>
         <span class="gpu-pill">⚡ ZeroGPU</span>
     </div>
     """)
         elem_classes="seed-pill",
     )
+    gr.HTML('<div class="card"><div class="card-label">✦ Prompt — describe your character</div>')
     prompt = gr.Textbox(
         show_label=False,
+        placeholder="warrior woman in red armour, glowing sword, forest background...",
         lines=3,
     )
     gr.HTML('</div>')
     expanded_out = gr.Markdown(
         value="",
         elem_classes="debug-box",
     )
     with gr.Accordion("⚙️  Settings", open=False):
             label="Negative Prompt",
             value=(
                 "worst quality, low quality, bad anatomy, bad hands, "
+                "extra limbs, missing limbs, watermark, signature, "
+                "blurry, deformed, ugly, text"
             ),
             lines=2,
         )
             height = gr.Slider(512, 1216, value=1216, step=64, label="Height")
         steps    = gr.Slider(20, 60,    value=30,  step=1,   label="Steps")
+        guidance = gr.Slider(1.0, 10.0, value=6.0, step=0.5, label="CFG Scale")
         with gr.Row():
             seed = gr.Number(
             randomize = gr.Checkbox(label="Random seed", value=True, scale=1)
         show_expanded = gr.Checkbox(
+            label="Show expanded prompt",
             value=True,
         )
     with gr.Accordion("🎨  LoRA", open=False):
         gr.HTML('<div style="height:6px"></div>')
+        lora_name     = gr.Dropdown(choices=list(LORAS.keys()), value="None", label="LoRA")
+        lora_strength = gr.Slider(0.1, 1.0, value=0.7, step=0.05, label="Strength")
     generate_btn.click(
         fn=generate,