Spaces:

bmarci
/

NextStep-1-Large

Running on Zero

App Files Files Community

bmarci commited on Aug 23

Commit

3669017

1 Parent(s): 940ab95

correct sizing, and limit

Browse files

Files changed (1) hide show

app.py +33 -94

app.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import gradio as gr
 import numpy as np
-import random
 import spaces
 from PIL import Image
 import torch
@@ -24,63 +23,23 @@ model = AutoModel.from_pretrained(
 pipeline = NextStepPipeline(tokenizer=tokenizer, model=model).to(device=device, dtype=torch.bfloat16)
 MAX_SEED = np.iinfo(np.int16).max
 DEFAULT_POSITIVE_PROMPT = None
 DEFAULT_NEGATIVE_PROMPT = None
 def _ensure_pil(x):
     if isinstance(x, Image.Image):
         return x
-    # try common conversions (numpy / torch -> PIL)
-    try:
-        import numpy as np
-        if hasattr(x, "detach"):
-            x = x.detach().float().clamp(0, 1).cpu().numpy()
-        if isinstance(x, np.ndarray):
-            if x.dtype != np.uint8:
-                x = (x * 255.0).clip(0, 255).astype(np.uint8)
-            if x.ndim == 3 and x.shape[0] in (1,3,4):  # CHW -> HWC
-                x = np.moveaxis(x, 0, -1)
-            return Image.fromarray(x)
-    except Exception:
-        pass
-    raise TypeError("Unsupported image type returned by pipeline; expected PIL or array/torch image.")
-def resize_to_target(img: Image.Image, tw: int, th: int, mode: str = "fit"):
-    """Return a PIL image of exactly (tw, th) using the selected mode."""
-    mode = (mode or "fit").lower()
-    # safety
-    tw = int(max(1, tw))
-    th = int(max(1, th))
-    if mode == "stretch":
-        return img.resize((tw, th), resample=Image.Resampling.LANCZOS)
-    iw, ih = img.size
-    if iw == 0 or ih == 0:
-        return img
-    src_ratio = iw / ih
-    tgt_ratio = tw / th
-    if mode == "fill":
-        # scale so that image fully covers target, then center-crop
-        scale = max(tw / iw, th / ih)
-        nw, nh = int(round(iw * scale)), int(round(ih * scale))
-        resized = img.resize((nw, nh), resample=Image.Resampling.LANCZOS)
-        left = (nw - tw) // 2
-        top = (nh - th) // 2
-        return resized.crop((left, top, left + tw, top + th))
-    else:
-        # "fit": letterbox to target
-        scale = min(tw / iw, th / ih)
-        nw, nh = int(round(iw * scale)), int(round(ih * scale))
-        resized = img.resize((nw, nh), resample=Image.Resampling.LANCZOS)
-        canvas = Image.new("RGB", (tw, th), (0, 0, 0))
-        left = (tw - nw) // 2
-        top = (th - nh) // 2
-        canvas.paste(resized, (left, top))
-        return canvas
 @spaces.GPU(duration=300)
 def infer(
@@ -91,14 +50,13 @@ def infer(
     num_inference_steps=28,
     positive_prompt=DEFAULT_POSITIVE_PROMPT,
     negative_prompt=DEFAULT_NEGATIVE_PROMPT,
-    resize_mode="fit (letterbox)",
     progress=gr.Progress(track_tqdm=True),
 ):
     if prompt in [None, ""]:
         gr.Warning("⚠️ Please enter a prompt!")
         return None
-    # Generate at (height, width). Some models may return bucketed sizes.
     with autocast(device_type=("cuda" if device == "cuda" else "cpu"), dtype=torch.bfloat16):
         imgs = pipeline.generate_image(
             prompt,
@@ -116,23 +74,18 @@ def infer(
             progress=True,
         )
-    img = _ensure_pil(imgs[0])
-    # Force output to exactly Width x Height based on user preference
-    mode_key = "fit" if "fit" in resize_mode else ("fill" if "fill" in resize_mode else "stretch")
-    out = resize_to_target(img, int(width), int(height), mode=mode_key)
-    return out
 css = """
 #col-container {
     margin: 0 auto;
-    max-width: 900px;
 }
 """
 with gr.Blocks(css=css) as demo:
     with gr.Column(elem_id="col-container"):
-        gr.Markdown("# NextStep-1-Large — Edit & Size-Adaptive Output")
         with gr.Row():
             prompt = gr.Text(
@@ -180,26 +133,19 @@ with gr.Blocks(css=css) as demo:
                     width = gr.Slider(
                         label="Width",
                         minimum=256,
-                        maximum=1536,
                         step=64,
-                        value=768,
                     )
                     height = gr.Slider(
                         label="Height",
                         minimum=256,
-                        maximum=1536,
                         step=64,
-                        value=1024,
                     )
-                resize_mode = gr.Radio(
-                    label="Resize mode (final output)",
-                    choices=["fit (letterbox)", "fill (center-crop)", "stretch"],
-                    value="fit (letterbox)",
-                )
         with gr.Row():
-            # Remove fixed height so the component can display any size; it will scale in the UI,
-            # but the returned image file is exactly width x height.
             result_1 = gr.Image(
                 label="Result",
                 show_label=True,
@@ -208,29 +154,25 @@ with gr.Blocks(css=css) as demo:
                 format="png",
             )
-        # --- Click & Fill Examples ---
         examples = [
-            # [prompt, seed, width, height, steps, positive, negative, resize_mode]
             [
-                "Sunrise over terraced rice fields, mist in the valley, lone farmer with conical hat",
-                101, 832, 1216, 28,
-                "soft god rays, crisp details, photorealistic, golden hour",
-                "blurry, over-saturated, artifacts",
-                "fit (letterbox)",
             ],
             [
-                "Glass lighthouse on a stormy cliff, waves crashing, bioluminescent algae trails",
-                777, 1024, 768, 32,
-                "cinematic lighting, long exposure water, detailed foam",
-                "cartoon, low-res, extra limbs",
-                "fill (center-crop)",
             ],
             [
-                "Ancient stone bridge in a mossy ravine, waterfalls and hanging lanterns at dusk",
-                3407, 1024, 1024, 30,
-                "volumetric fog, wet stone microtexture, realistic vegetation",
-                "banding, washed-out, text",
-                "stretch",
             ],
         ]
@@ -244,9 +186,8 @@ with gr.Blocks(css=css) as demo:
                 num_inference_steps,
                 positive_prompt,
                 negative_prompt,
-                resize_mode,
             ],
-            label="Click & Fill Examples",
         )
     def show_result():
@@ -263,7 +204,6 @@ with gr.Blocks(css=css) as demo:
             num_inference_steps,
             positive_prompt,
             negative_prompt,
-            resize_mode,
         ],
         outputs=[result_1],
     )
@@ -271,5 +211,4 @@ with gr.Blocks(css=css) as demo:
     cancel_button.click(fn=None, inputs=None, outputs=None, cancels=[generation_event])
 if __name__ == "__main__":
-    # Set share=True if you want a public link
     demo.launch()

 import gradio as gr
 import numpy as np
 import spaces
 from PIL import Image
 import torch
 pipeline = NextStepPipeline(tokenizer=tokenizer, model=model).to(device=device, dtype=torch.bfloat16)
 MAX_SEED = np.iinfo(np.int16).max
 DEFAULT_POSITIVE_PROMPT = None
 DEFAULT_NEGATIVE_PROMPT = None
 def _ensure_pil(x):
+    """Ensure returned image is a PIL.Image.Image."""
     if isinstance(x, Image.Image):
         return x
+    import numpy as np
+    if hasattr(x, "detach"):
+        x = x.detach().float().clamp(0, 1).cpu().numpy()
+    if isinstance(x, np.ndarray):
+        if x.dtype != np.uint8:
+            x = (x * 255.0).clip(0, 255).astype(np.uint8)
+        if x.ndim == 3 and x.shape[0] in (1,3,4):  # CHW -> HWC
+            x = np.moveaxis(x, 0, -1)
+        return Image.fromarray(x)
+    raise TypeError("Unsupported image type returned by pipeline.")
 @spaces.GPU(duration=300)
 def infer(
     num_inference_steps=28,
     positive_prompt=DEFAULT_POSITIVE_PROMPT,
     negative_prompt=DEFAULT_NEGATIVE_PROMPT,
     progress=gr.Progress(track_tqdm=True),
 ):
+    """Run inference at exactly (width, height)."""
     if prompt in [None, ""]:
         gr.Warning("⚠️ Please enter a prompt!")
         return None
     with autocast(device_type=("cuda" if device == "cuda" else "cpu"), dtype=torch.bfloat16):
         imgs = pipeline.generate_image(
             prompt,
             progress=True,
         )
+    return _ensure_pil(imgs[0])  # Return raw output exactly as generated
 css = """
 #col-container {
     margin: 0 auto;
+    max-width: 800px;
 }
 """
 with gr.Blocks(css=css) as demo:
     with gr.Column(elem_id="col-container"):
+        gr.Markdown("# NextStep-1-Large — Exact Output Size")
         with gr.Row():
             prompt = gr.Text(
                     width = gr.Slider(
                         label="Width",
                         minimum=256,
+                        maximum=512,
                         step=64,
+                        value=512,
                     )
                     height = gr.Slider(
                         label="Height",
                         minimum=256,
+                        maximum=512,
                         step=64,
+                        value=512,
                     )
         with gr.Row():
             result_1 = gr.Image(
                 label="Result",
                 show_label=True,
                 format="png",
             )
+        # Click & Fill Examples (all <=512px)
         examples = [
             [
+                "A cozy wooden cabin by a frozen lake, northern lights in the sky",
+                123, 512, 512, 28,
+                "photorealistic, cinematic lighting, starry night, glowing reflections",
+                "low-res, distorted, extra objects"
             ],
             [
+                "Futuristic city skyline at sunset, flying cars, neon reflections",
+                456, 512, 384, 30,
+                "detailed, vibrant, cinematic, sharp edges",
+                "washed out, cartoon, blurry"
             ],
             [
+                "Close-up of a rare orchid in a greenhouse with soft morning light",
+                789, 384, 512, 32,
+                "macro lens effect, ultra-detailed petals, dew drops",
+                "grainy, noisy, oversaturated"
             ],
         ]
                 num_inference_steps,
                 positive_prompt,
                 negative_prompt,
             ],
+            label="Click & Fill Examples (Exact Size)",
         )
     def show_result():
             num_inference_steps,
             positive_prompt,
             negative_prompt,
         ],
         outputs=[result_1],
     )
     cancel_button.click(fn=None, inputs=None, outputs=None, cancels=[generation_event])
 if __name__ == "__main__":
     demo.launch()