Spaces:

coralLight
/

Hyperparameters-are-all-you-need-UniPC-XL

Running on Zero

App Files Files Community

coralLight commited on 13 days ago

Commit

508e6a7

1 Parent(s): ddc7f94

add inference

Browse files

Files changed (1) hide show

app.py +110 -117

app.py CHANGED Viewed

@@ -34,58 +34,112 @@ from customed_unipc_scheduler import CustomedUniPCMultistepScheduler
 precision_scope = autocast
-def chunk(it, size):
-    it = iter(it)
-    return iter(lambda: tuple(islice(it, size)), ())
-def numpy_to_pil(images):
-    """
-    Convert a numpy image or a batch of images to a PIL image.
-    """
-    if images.ndim == 3:
-        images = images[None, ...]
-    images = (images * 255).round().astype("uint8")
-    pil_images = [Image.fromarray(image) for image in images]
-    return pil_images
-def load_replacement(x):
-    try:
-        hwc = x.shape
-        y = Image.open("assets/rick.jpeg").convert("RGB").resize((hwc[1], hwc[0]))
-        y = (np.array(y) / 255.0).astype(x.dtype)
-        assert y.shape == x.shape
-        return y
-    except Exception:
-        return x
-# Adapted from pipelines.StableDiffusionPipeline.encode_prompt
-def encode_prompt(prompt_batch, text_encoder, tokenizer, proportion_empty_prompts, is_train=True):
-    captions = []
-    for caption in prompt_batch:
-        if random.random() < proportion_empty_prompts:
-            captions.append("")
-        elif isinstance(caption, str):
-            captions.append(caption)
-        elif isinstance(caption, (list, np.ndarray)):
-            # take a random caption if there are multiple
-            captions.append(random.choice(caption) if is_train else caption[0])
-    with torch.no_grad():
-        text_inputs = tokenizer(
-            captions,
-            padding="max_length",
-            max_length=tokenizer.model_max_length,
-            truncation=True,
-            return_tensors="pt",
         )
-        text_input_ids = text_inputs.input_ids
-        prompt_embeds = text_encoder(text_input_ids.to(text_encoder.device))[0]
-    return prompt_embeds
 def chunk(it, size):
     it = iter(it)
@@ -95,67 +149,6 @@ def convert_caption_json_to_str(json):
     caption = json["caption"]
     return caption
-def prepare_sdxl_pipeline_step_parameter(pipe, prompts, need_cfg, device, negative_prompts, W = 1024, H = 1024):
-    (
-        prompt_embeds,
-        negative_prompt_embeds,
-        pooled_prompt_embeds,
-        negative_pooled_prompt_embeds,
-    ) = pipe.encode_prompt(
-        prompt=prompts,
-        negative_prompt=negative_prompts,
-        device=device,
-        do_classifier_free_guidance=need_cfg,
-    )
-    # timesteps = pipe.scheduler.timesteps
-    prompt_embeds = prompt_embeds.to(device)
-    add_text_embeds = pooled_prompt_embeds.to(device)
-    original_size = (W, H)
-    crops_coords_top_left = (0, 0)
-    target_size = (W, H)
-    text_encoder_projection_dim = None
-    add_time_ids = list(original_size + crops_coords_top_left + target_size)
-    if pipe.text_encoder_2 is None:
-        text_encoder_projection_dim = int(pooled_prompt_embeds.shape[-1])
-    else:
-        text_encoder_projection_dim = pipe.text_encoder_2.config.projection_dim
-    passed_add_embed_dim = (
-        pipe.unet.config.addition_time_embed_dim * len(add_time_ids) + text_encoder_projection_dim
-    )
-    expected_add_embed_dim = pipe.unet.add_embedding.linear_1.in_features
-    if expected_add_embed_dim != passed_add_embed_dim:
-        raise ValueError(
-            f"Model expects an added time embedding vector of length {expected_add_embed_dim}, but a vector of {passed_add_embed_dim} was created. The model has an incorrect config. Please check `unet.config.time_embedding_type` and `text_encoder_2.config.projection_dim`."
-        )
-    add_time_ids = torch.tensor([add_time_ids], dtype=prompt_embeds.dtype)
-    add_time_ids = add_time_ids.to(device)
-    negative_add_time_ids = add_time_ids
-    if need_cfg:
-        prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0)
-        add_text_embeds = torch.cat([negative_pooled_prompt_embeds, add_text_embeds], dim=0)
-        add_time_ids = torch.cat([negative_add_time_ids, add_time_ids], dim=0)
-    ret_dict = {
-        "text_embeds": add_text_embeds,
-        "time_ids": add_time_ids
-    }
-    return prompt_embeds, ret_dict
-def model_closure(pipe):
-    def model_fn(x, t, c):
-        prompt = c[0]
-        cond_kwargs = c[1] if len(c) > 1 else None
-        # prompt_embeds, cond_kwargs = prepare_sdxl_pipeline_step_parameter(pipe=pipe,prompts = prompt, need_cfg=True, device=pipe.device,negative_prompts=negative_prompt)
-        # prompt_embeds, cond_kwargs = c
-        return pipe.unet(x
-                         , t
-                         , encoder_hidden_states=prompt.to(device=x.device, dtype=x.dtype)
-                         , added_cond_kwargs=cond_kwargs).sample
-    return model_fn
 torch_dtype = torch.float16
 repo_id = "madebyollin/sdxl-vae-fp16-fix"  # e.g., "distilbert/distilgpt2"
@@ -210,12 +203,12 @@ def generate_image_with_steps(prompt, negative_prompt, seed, width, height, guid
                 negative_prompts = 1 * [negative_prompts]
                 prompt_embeds, cond_kwargs = prepare_sdxl_pipeline_step_parameter(pipe
-                                                                                      , prompts
-                                                                                      , need_cfg=True
-                                                                                      , device=pipe.device
-                                                                                      , negative_prompt=negative_prompts
-                                                                                      , W=width
-                                                                                      , H=height)
                 noise_pred  = pipe.unet(latent_model_input
                                         , t
                                         , encoder_hidden_states=prompt_embeds.to(device=latents.device, dtype=latents.dtype)

 precision_scope = autocast
+def extract_into_tensor(a, t, x_shape):
+    b, *_ = t.shape
+    out = a.gather(-1, t)
+    return out.reshape(b, *((1,) * (len(x_shape) - 1)))
+def append_zero(x):
+    return torch.cat([x, x.new_zeros([1])])
+def prepare_sdxl_pipeline_step_parameter( pipe: StableDiffusionXLPipeline
+                                         , prompts
+                                         , need_cfg
+                                         , device
+                                         , negative_prompt = None
+                                         , W = 1024
+                                         , H = 1024): # need to correct the format
+        (
+            prompt_embeds,
+            negative_prompt_embeds,
+            pooled_prompt_embeds,
+            negative_pooled_prompt_embeds,
+        ) = pipe.encode_prompt(
+            prompt=prompts,
+            negative_prompt=negative_prompt,
+            device=device,
+            do_classifier_free_guidance=need_cfg,
         )
+    # timesteps = pipe.scheduler.timesteps
+        prompt_embeds = prompt_embeds.to(device)
+        add_text_embeds = pooled_prompt_embeds.to(device)
+        original_size = (W, H)
+        crops_coords_top_left = (0, 0)
+        target_size = (W, H)
+        text_encoder_projection_dim = None
+        add_time_ids = list(original_size + crops_coords_top_left + target_size)
+        if pipe.text_encoder_2 is None:
+            text_encoder_projection_dim = int(pooled_prompt_embeds.shape[-1])
+        else:
+            text_encoder_projection_dim = pipe.text_encoder_2.config.projection_dim
+        passed_add_embed_dim = (
+            pipe.unet.config.addition_time_embed_dim * len(add_time_ids) + text_encoder_projection_dim
+        )
+        expected_add_embed_dim = pipe.unet.add_embedding.linear_1.in_features
+        if expected_add_embed_dim != passed_add_embed_dim:
+            raise ValueError(
+                f"Model expects an added time embedding vector of length {expected_add_embed_dim}, but a vector of {passed_add_embed_dim} was created. The model has an incorrect config. Please check `unet.config.time_embedding_type` and `text_encoder_2.config.projection_dim`."
+            )
+        add_time_ids = torch.tensor([add_time_ids], dtype=prompt_embeds.dtype)
+        add_time_ids = add_time_ids.to(device)
+        negative_add_time_ids = add_time_ids
+        if need_cfg:
+            prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0)
+            add_text_embeds = torch.cat([negative_pooled_prompt_embeds, add_text_embeds], dim=0)
+            add_time_ids = torch.cat([negative_add_time_ids, add_time_ids], dim=0)
+        ret_dict = {
+            "text_embeds": add_text_embeds,
+            "time_ids": add_time_ids
+        }
+        return prompt_embeds, ret_dict
+# New helper to load a list-of-dicts preference JSON
+# JSON schema: [ { 'human_preference': [int], 'prompt': str, 'file_path': [str] }, ... ]
+def load_preference_json(json_path: str) -> list[dict]:
+    """Load records from a JSON file formatted as a list of preference dicts."""
+    with open(json_path, 'r') as f:
+        data = json.load(f)
+    return data
+# New helper to extract just the prompts from the preference JSON
+# Returns a flat list of all 'prompt' values
+def extract_prompts_from_pref_json(json_path: str) -> list[str]:
+    """Load a JSON of preference records and return only the prompts."""
+    records = load_preference_json(json_path)
+    return [rec['prompt'] for rec in records]
+# Example usage:
+# prompts = extract_prompts_from_pref_json("path/to/preference.json")
+# print(prompts)
+def get_sigmas_karras(n, sigma_min, sigma_max, rho=7., device='cpu',need_append_zero = True):
+    """Constructs the noise schedule of Karras et al. (2022)."""
+    ramp = torch.linspace(0, 1, n)
+    min_inv_rho = sigma_min ** (1 / rho)
+    max_inv_rho = sigma_max ** (1 / rho)
+    sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho
+    return append_zero(sigmas).to(device) if need_append_zero else sigmas.to(device)
+def extract_into_tensor(a, t, x_shape):
+    b, *_ = t.shape
+    out = a.gather(-1, t)
+    return out.reshape(b, *((1,) * (len(x_shape) - 1)))
+def append_zero(x):
+    return torch.cat([x, x.new_zeros([1])])
+def append_dims(x, target_dims):
+    """Appends dimensions to the end of a tensor until it has target_dims dimensions."""
+    dims_to_append = target_dims - x.ndim
+    if dims_to_append < 0:
+        raise ValueError(f'input has {x.ndim} dims but target_dims is {target_dims}, which is less')
+    return x[(...,) + (None,) * dims_to_append]
 def chunk(it, size):
     it = iter(it)
     caption = json["caption"]
     return caption
 torch_dtype = torch.float16
 repo_id = "madebyollin/sdxl-vae-fp16-fix"  # e.g., "distilbert/distilgpt2"
                 negative_prompts = 1 * [negative_prompts]
                 prompt_embeds, cond_kwargs = prepare_sdxl_pipeline_step_parameter(pipe
+                                                    , prompts
+                                                    , need_cfg=True
+                                                    , device=pipe.device
+                                                    , negative_prompt=negative_prompts
+                                                    , W=width
+                                                    , H=height)
                 noise_pred  = pipe.unet(latent_model_input
                                         , t
                                         , encoder_hidden_states=prompt_embeds.to(device=latents.device, dtype=latents.dtype)