bmarci commited on
Commit
18907bb
·
1 Parent(s): 4d5f907

Revert "adjustable cfg, better examples"

Browse files
Files changed (1) hide show
  1. app.py +42 -130
app.py CHANGED
@@ -25,8 +25,6 @@ pipeline = NextStepPipeline(tokenizer=tokenizer, model=model).to(device=device,
25
  MAX_SEED = np.iinfo(np.int16).max
26
  DEFAULT_POSITIVE_PROMPT = None
27
  DEFAULT_NEGATIVE_PROMPT = None
28
- DEFAULT_CFG = 7.5
29
-
30
 
31
  def _ensure_pil(x):
32
  """Ensure returned image is a PIL.Image.Image."""
@@ -38,79 +36,46 @@ def _ensure_pil(x):
38
  if isinstance(x, np.ndarray):
39
  if x.dtype != np.uint8:
40
  x = (x * 255.0).clip(0, 255).astype(np.uint8)
41
- if x.ndim == 3 and x.shape[0] in (1, 3, 4): # CHW -> HWC
42
  x = np.moveaxis(x, 0, -1)
43
  return Image.fromarray(x)
44
  raise TypeError("Unsupported image type returned by pipeline.")
45
 
46
-
47
- def calculate_gpu_duration(width, height, num_inference_steps):
48
- """
49
- Calculate GPU duration based on image dimensions and inference steps.
50
-
51
- Base calculation:
52
- - Minimum: 60 seconds for smallest images
53
- - Scales with total pixels and number of steps
54
- - Maximum: 600 seconds for safety
55
- """
56
- # Total pixels (in millions)
57
- total_pixels = (width * height) / 1_000_000
58
-
59
- # Base duration: assume ~1 second per megapixel per step as baseline
60
- # Adjust the multiplier based on your model's actual performance
61
- base_duration = total_pixels * num_inference_steps * 0.5
62
-
63
- # Add overhead for model loading and post-processing
64
- overhead = 30
65
-
66
- # Calculate final duration with min/max bounds
67
- duration = int(base_duration + overhead)
68
- duration = max(60, min(duration, 600)) # Between 60 and 600 seconds
69
-
70
- return duration
71
-
72
-
73
  def infer(
74
- prompt=None,
75
- seed=0,
76
- width=512,
77
- height=512,
78
- num_inference_steps=28,
79
- cfg=DEFAULT_CFG,
80
- positive_prompt=DEFAULT_POSITIVE_PROMPT,
81
- negative_prompt=DEFAULT_NEGATIVE_PROMPT,
82
- progress=gr.Progress(track_tqdm=True),
83
  ):
84
- """Run inference at exactly (width, height) with dynamic GPU allocation."""
85
  if prompt in [None, ""]:
86
  gr.Warning("⚠️ Please enter a prompt!")
87
  return None
88
 
89
- # Calculate dynamic duration based on image size
90
- gpu_duration = calculate_gpu_duration(width, height, num_inference_steps)
91
-
92
- # Use context manager for dynamic GPU allocation
93
- with spaces.GPU(duration=gpu_duration):
94
- with autocast(device_type=("cuda" if device == "cuda" else "cpu"), dtype=torch.bfloat16):
95
- imgs = pipeline.generate_image(
96
- prompt,
97
- hw=(int(height), int(width)),
98
- num_images_per_caption=1,
99
- positive_prompt=positive_prompt,
100
- negative_prompt=negative_prompt,
101
- cfg=float(cfg),
102
- cfg_img=1.0,
103
- cfg_schedule="constant",
104
- use_norm=False,
105
- num_sampling_steps=int(num_inference_steps),
106
- timesteps_shift=1.0,
107
- seed=int(seed),
108
- progress=True,
109
- )
110
 
111
  return _ensure_pil(imgs[0]) # Return raw output exactly as generated
112
 
113
-
114
  css = """
115
  #col-container {
116
  margin: 0 auto;
@@ -120,7 +85,7 @@ css = """
120
 
121
  with gr.Blocks(css=css) as demo:
122
  with gr.Column(elem_id="col-container"):
123
- gr.Markdown("# NextStep-1-Large — Image generation")
124
 
125
  with gr.Row():
126
  prompt = gr.Text(
@@ -179,14 +144,6 @@ with gr.Blocks(css=css) as demo:
179
  step=64,
180
  value=512,
181
  )
182
- cfg = gr.Slider(
183
- label="CFG (guidance scale)",
184
- minimum=0.0,
185
- maximum=20.0,
186
- step=0.5,
187
- value=DEFAULT_CFG,
188
- info="Higher = closer to text, lower = more creative",
189
- )
190
 
191
  with gr.Row():
192
  result_1 = gr.Image(
@@ -197,66 +154,25 @@ with gr.Blocks(css=css) as demo:
197
  format="png",
198
  )
199
 
 
200
  examples = [
201
  [
202
- "Studio portrait of an elderly sailor with a weathered face, dramatic Rembrandt lighting, shallow depth of field",
203
- 101, 512, 512, 32, 7.5,
204
- "photorealistic, sharp eyes, detailed skin texture, soft rim light, 85mm lens",
205
- "over-smoothed skin, plastic look, extra limbs, watermark",
206
  ],
207
  [
208
- "Isometric cozy coffee shop interior with hanging plants and warm Edison bulbs",
209
- 202, 512, 384, 30, 8.5,
210
- "isometric view, clean lines, stylized, warm ambience, detailed furniture",
211
- "text, logo, watermark, perspective distortion",
212
  ],
213
  [
214
- "Ultra-wide desert canyon at golden hour with long shadows and dust in the air",
215
- 303, 512, 320, 28, 7.0,
216
- "cinematic, volumetric light, natural colors, high dynamic range",
217
- "over-saturated, haze artifacts, blown highlights",
218
- ],
219
- [
220
- "Cute red panda astronaut sticker, chibi style, white background",
221
- 404, 384, 384, 24, 9.0,
222
- "vector look, bold outlines, high contrast, die-cut silhouette",
223
- "background clutter, drop shadow, gradients, text",
224
- ],
225
- [
226
- "Product render of matte-black wireless headphones on reflective glass with soft studio lighting",
227
- 505, 512, 384, 28, 7.0,
228
- "clean backdrop, realistic reflections, subtle bloom, high detail",
229
- "noise, fingerprints, text, label",
230
- ],
231
- [
232
- "Graphic poster in Bauhaus style with geometric shapes and bold typography placeholders",
233
- 606, 512, 512, 22, 6.0,
234
- "flat colors, minimal palette, crisp edges, balanced composition",
235
- "photo realism, gradients, noisy texture",
236
- ],
237
- [
238
- "Oil painting of a stormy sea with a lighthouse, thick impasto brushwork",
239
- 707, 384, 512, 34, 7.0,
240
- "textured canvas, visible brush strokes, dramatic sky, moody lighting",
241
- "smooth digital look, airbrush, neon colors",
242
- ],
243
- [
244
- "Architectural concept art: glass pavilion in a pine forest at dawn, ground fog",
245
- 808, 512, 384, 30, 8.0,
246
- "physically-based rendering, soft fog, realistic materials, scale figures",
247
- "tilt, skew, warped geometry, chromatic aberration",
248
- ],
249
- [
250
- "Fantasy creature: bioluminescent jellyfish dragon swimming through a dark ocean trench",
251
- 909, 512, 512, 32, 8.5,
252
- "glowing tendrils, soft caustics, particles, high detail",
253
- "washed out, murky, low contrast, extra heads",
254
- ],
255
- [
256
- "Line art coloring page of a city skyline with hot air balloons",
257
- 111, 512, 512, 18, 5.5,
258
- "clean black outlines, uniform stroke weight, high contrast, no shading",
259
- "gray fill, gradients, cross-hatching, text",
260
  ],
261
  ]
262
 
@@ -268,18 +184,15 @@ with gr.Blocks(css=css) as demo:
268
  width,
269
  height,
270
  num_inference_steps,
271
- cfg,
272
  positive_prompt,
273
  negative_prompt,
274
  ],
275
  label="Click & Fill Examples (Exact Size)",
276
  )
277
 
278
-
279
  def show_result():
280
  return gr.update(visible=True)
281
 
282
-
283
  generation_event = gr.on(
284
  triggers=[run_button.click, prompt.submit],
285
  fn=infer,
@@ -289,7 +202,6 @@ with gr.Blocks(css=css) as demo:
289
  width,
290
  height,
291
  num_inference_steps,
292
- cfg,
293
  positive_prompt,
294
  negative_prompt,
295
  ],
@@ -299,4 +211,4 @@ with gr.Blocks(css=css) as demo:
299
  cancel_button.click(fn=None, inputs=None, outputs=None, cancels=[generation_event])
300
 
301
  if __name__ == "__main__":
302
- demo.launch()
 
25
  MAX_SEED = np.iinfo(np.int16).max
26
  DEFAULT_POSITIVE_PROMPT = None
27
  DEFAULT_NEGATIVE_PROMPT = None
 
 
28
 
29
  def _ensure_pil(x):
30
  """Ensure returned image is a PIL.Image.Image."""
 
36
  if isinstance(x, np.ndarray):
37
  if x.dtype != np.uint8:
38
  x = (x * 255.0).clip(0, 255).astype(np.uint8)
39
+ if x.ndim == 3 and x.shape[0] in (1,3,4): # CHW -> HWC
40
  x = np.moveaxis(x, 0, -1)
41
  return Image.fromarray(x)
42
  raise TypeError("Unsupported image type returned by pipeline.")
43
 
44
+ @spaces.GPU(duration=300)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  def infer(
46
+ prompt=None,
47
+ seed=0,
48
+ width=512,
49
+ height=512,
50
+ num_inference_steps=28,
51
+ positive_prompt=DEFAULT_POSITIVE_PROMPT,
52
+ negative_prompt=DEFAULT_NEGATIVE_PROMPT,
53
+ progress=gr.Progress(track_tqdm=True),
 
54
  ):
55
+ """Run inference at exactly (width, height)."""
56
  if prompt in [None, ""]:
57
  gr.Warning("⚠️ Please enter a prompt!")
58
  return None
59
 
60
+ with autocast(device_type=("cuda" if device == "cuda" else "cpu"), dtype=torch.bfloat16):
61
+ imgs = pipeline.generate_image(
62
+ prompt,
63
+ hw=(int(height), int(width)),
64
+ num_images_per_caption=1,
65
+ positive_prompt=positive_prompt,
66
+ negative_prompt=negative_prompt,
67
+ cfg=7.5,
68
+ cfg_img=1.0,
69
+ cfg_schedule="constant",
70
+ use_norm=False,
71
+ num_sampling_steps=int(num_inference_steps),
72
+ timesteps_shift=1.0,
73
+ seed=int(seed),
74
+ progress=True,
75
+ )
 
 
 
 
 
76
 
77
  return _ensure_pil(imgs[0]) # Return raw output exactly as generated
78
 
 
79
  css = """
80
  #col-container {
81
  margin: 0 auto;
 
85
 
86
  with gr.Blocks(css=css) as demo:
87
  with gr.Column(elem_id="col-container"):
88
+ gr.Markdown("# NextStep-1-Large — Exact Output Size")
89
 
90
  with gr.Row():
91
  prompt = gr.Text(
 
144
  step=64,
145
  value=512,
146
  )
 
 
 
 
 
 
 
 
147
 
148
  with gr.Row():
149
  result_1 = gr.Image(
 
154
  format="png",
155
  )
156
 
157
+ # Click & Fill Examples (all <=512px)
158
  examples = [
159
  [
160
+ "A cozy wooden cabin by a frozen lake, northern lights in the sky",
161
+ 123, 512, 512, 28,
162
+ "photorealistic, cinematic lighting, starry night, glowing reflections",
163
+ "low-res, distorted, extra objects"
164
  ],
165
  [
166
+ "Futuristic city skyline at sunset, flying cars, neon reflections",
167
+ 456, 512, 384, 30,
168
+ "detailed, vibrant, cinematic, sharp edges",
169
+ "washed out, cartoon, blurry"
170
  ],
171
  [
172
+ "Close-up of a rare orchid in a greenhouse with soft morning light",
173
+ 789, 384, 512, 32,
174
+ "macro lens effect, ultra-detailed petals, dew drops",
175
+ "grainy, noisy, oversaturated"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
  ],
177
  ]
178
 
 
184
  width,
185
  height,
186
  num_inference_steps,
 
187
  positive_prompt,
188
  negative_prompt,
189
  ],
190
  label="Click & Fill Examples (Exact Size)",
191
  )
192
 
 
193
  def show_result():
194
  return gr.update(visible=True)
195
 
 
196
  generation_event = gr.on(
197
  triggers=[run_button.click, prompt.submit],
198
  fn=infer,
 
202
  width,
203
  height,
204
  num_inference_steps,
 
205
  positive_prompt,
206
  negative_prompt,
207
  ],
 
211
  cancel_button.click(fn=None, inputs=None, outputs=None, cancels=[generation_event])
212
 
213
  if __name__ == "__main__":
214
+ demo.launch()