tomiconic commited on
Commit
1c80f45
ยท
verified ยท
1 Parent(s): c51a859

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -79
app.py CHANGED
@@ -5,104 +5,101 @@ from diffusers import StableDiffusionXLPipeline, DPMSolverMultistepScheduler
5
  from huggingface_hub import hf_hub_download, InferenceClient
6
  import random
7
  import os
 
8
 
9
- # โ”€โ”€ HF Inference client (prompt expansion LLM) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
10
- HF_TOKEN = os.environ.get("HF_TOKEN", None)
 
 
 
 
 
 
 
11
  llm_client = InferenceClient(
12
  model="mistralai/Mistral-7B-Instruct-v0.3",
13
  token=HF_TOKEN,
14
  )
15
 
16
- # โ”€โ”€ Image model โ€” CyberIllustrious โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
17
- MODEL_REPO = "cyberdelia/latest_sdxl_models"
18
- MODEL_FILE = "CyberIllustrious_V8.0alt.safetensors"
19
- IL_POS = "masterpiece, best quality, very aesthetic, absurdres, "
20
- IL_NEG = "worst quality, low quality, bad quality, ugly, "
21
 
22
- print("Downloading CyberIllustrious...")
23
- local_path = hf_hub_download(
24
- repo_id=MODEL_REPO,
25
- filename=MODEL_FILE,
26
- token=HF_TOKEN,
27
- )
28
- print("Loading pipeline...")
29
- pipe = StableDiffusionXLPipeline.from_single_file(local_path, torch_dtype=torch.float16)
30
- pipe.scheduler = DPMSolverMultistepScheduler.from_config(
31
- pipe.scheduler.config, use_karras_sigmas=True
32
- )
33
- pipe.enable_attention_slicing()
34
- print("Ready.")
35
-
36
- # โ”€โ”€ LLM prompt expansion โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
37
- EXPANSION_SYSTEM = """You are an expert Stable Diffusion prompt engineer specialising in photorealistic and cinematic image generation.
38
-
39
- Your job: take a short user description and rewrite it as a detailed, accurate image generation prompt.
40
 
41
  Rules:
42
- - PRESERVE every specific detail from the input โ€” if they say "one window open", "rainbow", "red door", those MUST appear
43
- - Wrap unique/specific details in attention weights like (one window open:1.4) or (rainbow:1.3)
44
- - Add: lighting description, camera/lens style, atmosphere, material textures, composition
45
- - Add quality boosters appropriate to the scene
46
- - Do NOT add people unless the user mentioned people
47
- - Do NOT change the subject or invent things not implied
48
  - Return ONLY the final prompt โ€” no explanation, no preamble, no quotes
49
- - Keep it under 120 words
50
- - Use comma-separated tags and phrases, not full sentences"""
51
 
52
  def expand_prompt_llm(raw_prompt, style):
53
- """Use Mistral to expand the user's short prompt Fooocus-style."""
54
  if not raw_prompt.strip():
55
  return ""
56
-
57
  style_hint = f" The desired style is: {style}." if style != "Auto" else ""
58
-
59
- user_msg = f"Expand this into a detailed image generation prompt:{style_hint}\n\n{raw_prompt.strip()}"
60
-
61
  try:
62
  response = llm_client.chat_completion(
63
  messages=[
64
  {"role": "system", "content": EXPANSION_SYSTEM},
65
  {"role": "user", "content": user_msg},
66
  ],
67
- max_tokens=200,
68
  temperature=0.7,
69
  )
70
  expanded = response.choices[0].message.content.strip()
71
- # Clean up any accidental quotes or preamble
72
  expanded = expanded.strip('"').strip("'")
73
  if expanded.lower().startswith("prompt:"):
74
  expanded = expanded[7:].strip()
75
  return expanded
76
  except Exception as e:
77
- print(f"LLM expansion failed, using raw prompt: {e}")
78
  return raw_prompt.strip()
79
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  # โ”€โ”€ Style presets โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
81
  STYLES = {
82
- "Auto": {"pos": "", "neg": ""},
83
- "๐Ÿ“ธ Photo": {
84
- "pos": "RAW photo, photorealistic, DSLR, 8k uhd, film grain, Fujifilm XT3, sharp focus, ",
85
- "neg": "painting, illustration, cartoon, anime, cgi, render, ",
86
  },
87
- "๐ŸŽฌ Cinematic": {
88
- "pos": "cinematic movie still, anamorphic lens, film grain, color graded, dramatic lighting, ",
89
- "neg": "flat lighting, amateur, snapshot, overexposed, ",
90
  },
91
- "๐Ÿ–ผ๏ธ Portrait": {
92
- "pos": "professional portrait, studio lighting, 85mm lens, bokeh, sharp eyes, skin texture, ",
93
- "neg": "wide angle distortion, bad eyes, cropped head, ",
94
  },
95
- "๐ŸŒ† Neon City": {
96
- "pos": "cyberpunk city, neon lights, rain reflections, night scene, blade runner aesthetic, ",
97
- "neg": "daytime, rural, nature, warm tones, ",
98
  },
99
- "โœจ Fantasy": {
100
- "pos": "fantasy art, epic, magical atmosphere, volumetric lighting, concept art, artstation, ",
101
- "neg": "modern, mundane, flat, ",
102
  },
103
- "๐ŸŽจ Painterly": {
104
- "pos": "oil painting, impressionist, visible brushstrokes, canvas texture, museum quality, ",
105
- "neg": "photo, digital flat art, ",
106
  },
107
  }
108
 
@@ -133,18 +130,15 @@ def generate(raw_prompt, negative_prompt, style, lora_name, lora_strength,
133
  seed = random.randint(0, 2**32 - 1)
134
  seed = int(seed)
135
 
136
- # โ”€โ”€ LLM expansion โ”€โ”€
137
  expanded = expand_prompt_llm(raw_prompt, style)
138
-
139
- # โ”€โ”€ Build final prompt โ”€โ”€
140
  style_data = STYLES.get(style, STYLES["Auto"])
141
  final_pos = IL_POS + style_data["pos"] + expanded
142
  final_neg = IL_NEG + style_data["neg"] + negative_prompt.strip()
143
 
144
- # โ”€โ”€ Move to GPU โ”€โ”€
145
  pipe.to("cuda")
146
 
147
- # โ”€โ”€ Load LoRA โ”€โ”€
148
  lora_loaded = False
149
  lora_data = LORAS.get(lora_name)
150
  if lora_data:
@@ -179,10 +173,8 @@ def generate(raw_prompt, negative_prompt, style, lora_name, lora_strength,
179
 
180
  pipe.to("cpu")
181
 
182
- # โ”€โ”€ Debug output โ”€โ”€
183
- debug_text = f"**Expanded prompt sent to model:**\n\n{final_pos}" if show_expanded else ""
184
-
185
- return result.images[0], seed, debug_text
186
 
187
  # โ”€โ”€ CSS โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
188
  css = """
@@ -362,7 +354,6 @@ label > span:first-child {
362
  letter-spacing: 1px !important;
363
  }
364
 
365
- /* Expanded prompt debug box */
366
  .debug-box {
367
  background: #080814;
368
  border: 1px solid #111122;
@@ -374,6 +365,7 @@ label > span:first-child {
374
  font-family: monospace;
375
  word-break: break-word;
376
  margin-bottom: 8px;
 
377
  }
378
 
379
  .gen-btn button {
@@ -397,6 +389,7 @@ label > span:first-child {
397
  }
398
  .gen-btn button:active {
399
  transform: scale(0.98) !important;
 
400
  }
401
 
402
  footer, .built-with { display: none !important; }
@@ -407,7 +400,7 @@ with gr.Blocks(css=css, title="ImageGen") as demo:
407
 
408
  gr.HTML("""
409
  <div class="topbar">
410
- <span class="topbar-title">CyberIllustrious</span>
411
  <span class="gpu-pill">โšก ZeroGPU</span>
412
  </div>
413
  """)
@@ -421,10 +414,10 @@ with gr.Blocks(css=css, title="ImageGen") as demo:
421
  elem_classes="seed-pill",
422
  )
423
 
424
- gr.HTML('<div class="card"><div class="card-label">โœฆ Prompt โ€” write anything, short or long</div>')
425
  prompt = gr.Textbox(
426
  show_label=False,
427
- placeholder="building with rainbow and one window open...",
428
  lines=3,
429
  )
430
  gr.HTML('</div>')
@@ -445,7 +438,6 @@ with gr.Blocks(css=css, title="ImageGen") as demo:
445
  expanded_out = gr.Markdown(
446
  value="",
447
  elem_classes="debug-box",
448
- visible=True,
449
  )
450
 
451
  with gr.Accordion("โš™๏ธ Settings", open=False):
@@ -455,7 +447,8 @@ with gr.Blocks(css=css, title="ImageGen") as demo:
455
  label="Negative Prompt",
456
  value=(
457
  "worst quality, low quality, bad anatomy, bad hands, "
458
- "signature, watermarks, ugly, blurry, deformed"
 
459
  ),
460
  lines=2,
461
  )
@@ -465,7 +458,7 @@ with gr.Blocks(css=css, title="ImageGen") as demo:
465
  height = gr.Slider(512, 1216, value=1216, step=64, label="Height")
466
 
467
  steps = gr.Slider(20, 60, value=30, step=1, label="Steps")
468
- guidance = gr.Slider(1.0, 10.0, value=5.0, step=0.5, label="CFG Scale")
469
 
470
  with gr.Row():
471
  seed = gr.Number(
@@ -475,14 +468,14 @@ with gr.Blocks(css=css, title="ImageGen") as demo:
475
  randomize = gr.Checkbox(label="Random seed", value=True, scale=1)
476
 
477
  show_expanded = gr.Checkbox(
478
- label="Show expanded prompt (see what the LLM wrote)",
479
  value=True,
480
  )
481
 
482
  with gr.Accordion("๐ŸŽจ LoRA", open=False):
483
  gr.HTML('<div style="height:6px"></div>')
484
- lora_name = gr.Dropdown(choices=list(LORAS.keys()), value="None", label="LoRA preset")
485
- lora_strength = gr.Slider(0.1, 1.0, value=0.7, step=0.05, label="LoRA Strength")
486
 
487
  generate_btn.click(
488
  fn=generate,
 
5
  from huggingface_hub import hf_hub_download, InferenceClient
6
  import random
7
  import os
8
+ import re
9
 
10
+ # โ”€โ”€ Config โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
11
+ HF_TOKEN = os.environ.get("HF_TOKEN", None)
12
+ MODEL_REPO = "John6666/nova-3dcg-xl-illustrious-v40-sdxl"
13
+
14
+ # Quality tags for Illustrious-based models
15
+ IL_POS = "masterpiece, best quality, very aesthetic, absurdres, "
16
+ IL_NEG = "worst quality, low quality, bad quality, ugly, "
17
+
18
+ # โ”€โ”€ LLM client โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
19
  llm_client = InferenceClient(
20
  model="mistralai/Mistral-7B-Instruct-v0.3",
21
  token=HF_TOKEN,
22
  )
23
 
24
+ EXPANSION_SYSTEM = """You are an expert Stable Diffusion prompt engineer specialising in 3DCG character art and illustration.
 
 
 
 
25
 
26
+ Your job: take a short user description and rewrite it as a detailed, accurate image generation prompt optimised for a 3D CGI character art model (Nova 3DCG XL).
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
  Rules:
29
+ - PRESERVE every specific detail โ€” colours, numbers, states, accessories, clothing
30
+ - Wrap unique specific details in attention weights e.g. (red scarf:1.4), (one eye closed:1.3)
31
+ - Add: character pose, expression, lighting, background atmosphere, material quality, render style
32
+ - Add 3DCG-appropriate quality boosters: sharp edges, subsurface scattering, ray tracing, ambient occlusion
33
+ - Do NOT add NSFW content
34
+ - Do NOT invent things not implied by the user
35
  - Return ONLY the final prompt โ€” no explanation, no preamble, no quotes
36
+ - Keep under 130 words
37
+ - Use comma-separated tags and phrases"""
38
 
39
  def expand_prompt_llm(raw_prompt, style):
 
40
  if not raw_prompt.strip():
41
  return ""
 
42
  style_hint = f" The desired style is: {style}." if style != "Auto" else ""
43
+ user_msg = f"Expand this into a detailed 3DCG character art prompt:{style_hint}\n\n{raw_prompt.strip()}"
 
 
44
  try:
45
  response = llm_client.chat_completion(
46
  messages=[
47
  {"role": "system", "content": EXPANSION_SYSTEM},
48
  {"role": "user", "content": user_msg},
49
  ],
50
+ max_tokens=220,
51
  temperature=0.7,
52
  )
53
  expanded = response.choices[0].message.content.strip()
 
54
  expanded = expanded.strip('"').strip("'")
55
  if expanded.lower().startswith("prompt:"):
56
  expanded = expanded[7:].strip()
57
  return expanded
58
  except Exception as e:
59
+ print(f"LLM expansion failed: {e}")
60
  return raw_prompt.strip()
61
 
62
+ # โ”€โ”€ Load model โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
63
+ print(f"Loading Nova 3DCG XL from {MODEL_REPO}...")
64
+
65
+ pipe = StableDiffusionXLPipeline.from_pretrained(
66
+ MODEL_REPO,
67
+ torch_dtype=torch.float16,
68
+ token=HF_TOKEN,
69
+ )
70
+ pipe.scheduler = DPMSolverMultistepScheduler.from_config(
71
+ pipe.scheduler.config,
72
+ use_karras_sigmas=True,
73
+ )
74
+ pipe.enable_attention_slicing()
75
+ print("Pipeline ready.")
76
+
77
  # โ”€โ”€ Style presets โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
78
  STYLES = {
79
+ "Auto": {"pos": "", "neg": ""},
80
+ "๐ŸŽฎ 3DCG Render": {
81
+ "pos": "3DCG render, Pixar style, ray tracing, subsurface scattering, ambient occlusion, sharp edges, studio lighting, ",
82
+ "neg": "flat, 2D, anime flat colour, sketch, ",
83
  },
84
+ "โš”๏ธ Fantasy": {
85
+ "pos": "fantasy character, epic armour, magical atmosphere, dramatic lighting, volumetric fog, concept art, artstation, ",
86
+ "neg": "modern, mundane, sci-fi, ",
87
  },
88
+ "๐Ÿค– Sci-Fi": {
89
+ "pos": "sci-fi character, futuristic suit, neon accents, holographic elements, dark background, cinematic, ",
90
+ "neg": "medieval, fantasy, nature, ",
91
  },
92
+ "๐ŸŒธ Stylised": {
93
+ "pos": "stylised illustration, vibrant colours, soft cel shading, clean lineart, anime-adjacent, ",
94
+ "neg": "photorealistic, gritty, dark, ",
95
  },
96
+ "๐ŸŽฌ Cinematic": {
97
+ "pos": "cinematic portrait, dramatic rim lighting, shallow depth of field, film grain, color graded, ",
98
+ "neg": "flat, overexposed, sketch, ",
99
  },
100
+ "๐Ÿ™๏ธ Urban": {
101
+ "pos": "urban streetwear character, city background, neon lights, night scene, realistic clothing, ",
102
+ "neg": "fantasy, medieval, nature, ",
103
  },
104
  }
105
 
 
130
  seed = random.randint(0, 2**32 - 1)
131
  seed = int(seed)
132
 
133
+ # LLM expansion
134
  expanded = expand_prompt_llm(raw_prompt, style)
 
 
135
  style_data = STYLES.get(style, STYLES["Auto"])
136
  final_pos = IL_POS + style_data["pos"] + expanded
137
  final_neg = IL_NEG + style_data["neg"] + negative_prompt.strip()
138
 
 
139
  pipe.to("cuda")
140
 
141
+ # LoRA
142
  lora_loaded = False
143
  lora_data = LORAS.get(lora_name)
144
  if lora_data:
 
173
 
174
  pipe.to("cpu")
175
 
176
+ debug = f"**Expanded prompt:**\n\n{final_pos}" if show_expanded else ""
177
+ return result.images[0], seed, debug
 
 
178
 
179
  # โ”€โ”€ CSS โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
180
  css = """
 
354
  letter-spacing: 1px !important;
355
  }
356
 
 
357
  .debug-box {
358
  background: #080814;
359
  border: 1px solid #111122;
 
365
  font-family: monospace;
366
  word-break: break-word;
367
  margin-bottom: 8px;
368
+ min-height: 10px;
369
  }
370
 
371
  .gen-btn button {
 
389
  }
390
  .gen-btn button:active {
391
  transform: scale(0.98) !important;
392
+ box-shadow: 0 2px 12px #4a1aaa33 !important;
393
  }
394
 
395
  footer, .built-with { display: none !important; }
 
400
 
401
  gr.HTML("""
402
  <div class="topbar">
403
+ <span class="topbar-title">Nova 3DCG XL</span>
404
  <span class="gpu-pill">โšก ZeroGPU</span>
405
  </div>
406
  """)
 
414
  elem_classes="seed-pill",
415
  )
416
 
417
+ gr.HTML('<div class="card"><div class="card-label">โœฆ Prompt โ€” describe your character</div>')
418
  prompt = gr.Textbox(
419
  show_label=False,
420
+ placeholder="warrior woman in red armour, glowing sword, forest background...",
421
  lines=3,
422
  )
423
  gr.HTML('</div>')
 
438
  expanded_out = gr.Markdown(
439
  value="",
440
  elem_classes="debug-box",
 
441
  )
442
 
443
  with gr.Accordion("โš™๏ธ Settings", open=False):
 
447
  label="Negative Prompt",
448
  value=(
449
  "worst quality, low quality, bad anatomy, bad hands, "
450
+ "extra limbs, missing limbs, watermark, signature, "
451
+ "blurry, deformed, ugly, text"
452
  ),
453
  lines=2,
454
  )
 
458
  height = gr.Slider(512, 1216, value=1216, step=64, label="Height")
459
 
460
  steps = gr.Slider(20, 60, value=30, step=1, label="Steps")
461
+ guidance = gr.Slider(1.0, 10.0, value=6.0, step=0.5, label="CFG Scale")
462
 
463
  with gr.Row():
464
  seed = gr.Number(
 
468
  randomize = gr.Checkbox(label="Random seed", value=True, scale=1)
469
 
470
  show_expanded = gr.Checkbox(
471
+ label="Show expanded prompt",
472
  value=True,
473
  )
474
 
475
  with gr.Accordion("๐ŸŽจ LoRA", open=False):
476
  gr.HTML('<div style="height:6px"></div>')
477
+ lora_name = gr.Dropdown(choices=list(LORAS.keys()), value="None", label="LoRA")
478
+ lora_strength = gr.Slider(0.1, 1.0, value=0.7, step=0.05, label="Strength")
479
 
480
  generate_btn.click(
481
  fn=generate,