Upload folder using huggingface_hub

Browse files

Files changed (3) hide show

meanflow/helper_inference.py +209 -0
meanflow/notes.txt +15 -0
meanflow/targets_naive.py +35 -0

meanflow/helper_inference.py ADDED Viewed

	@@ -0,0 +1,209 @@

+import jax
+import jax.experimental
+import wandb
+import jax.numpy as jnp
+import numpy as np
+import tqdm
+import matplotlib.pyplot as plt
+import os
+from functools import partial
+from absl import app, flags
+flags.DEFINE_integer('inference_timesteps', 1, 'Number of timesteps for inference.')
+flags.DEFINE_integer('inference_generations', 50000, 'Number of generations for inference.')
+flags.DEFINE_float('inference_cfg_scale', 1.0, 'CFG scale for inference.')
+def do_inference(
+    FLAGS,
+    train_state,
+    step,
+    dataset,
+    dataset_valid,
+    shard_data,
+    vae_encode,
+    vae_decode,
+    update,
+    get_fid_activations,
+    imagenet_labels,
+    visualize_labels,
+    fid_from_stats,
+    truth_fid_stats,
+):
+    with jax.spmd_mode('allow_all'):
+        global_device_count = jax.device_count()
+        key = jax.random.PRNGKey(42 + jax.process_index())
+        batch_images, batch_labels = next(dataset)
+        valid_images, valid_labels = next(dataset_valid)
+        if FLAGS.model.use_stable_vae:
+            batch_images = vae_encode(key, batch_images)
+            valid_images = vae_encode(key, valid_images)
+        batch_labels_sharded, valid_labels_sharded = shard_data(batch_labels, valid_labels)
+        labels_uncond = shard_data(jnp.ones(batch_labels.shape, dtype=jnp.int32) * FLAGS.model['num_classes']) # Null token
+        eps = jax.random.normal(key, batch_images.shape)
+        def process_img(img):
+            if FLAGS.model.use_stable_vae:
+                img = vae_decode(img[None])[0]
+            img = img * 0.5 + 0.5
+            img = jnp.clip(img, 0, 1)
+            img = np.array(img)
+            return img
+        @partial(jax.jit, static_argnums=(5,))
+        def call_model(train_state, images, t, dt, labels, use_ema=True):
+            if use_ema and FLAGS.model.use_ema:
+                call_fn = train_state.call_model_ema
+            else:
+                call_fn = train_state.call_model
+            output = call_fn(images, t, dt, labels, train=False)
+            return output
+        if FLAGS.mode == 'interpolate':
+            seed = 5
+            eps0 = jax.random.normal(jax.random.PRNGKey(seed), batch_images[0].shape)
+            eps1 = jax.random.normal(jax.random.PRNGKey(seed+1), batch_images[0].shape)
+            labels = jnp.ones(FLAGS.batch_size,).astype(jnp.int32) * 555
+            i = jnp.linspace(0, 1, FLAGS.batch_size)
+            i_neg = np.sqrt(1-i**2)
+            x = eps0[None] * i_neg[:, None, None, None] + eps1[None] * i[:, None, None, None]
+            t_vector = jnp.full((FLAGS.batch_size, ), 0)
+            dt_vector = jnp.zeros_like(t_vector)
+            cfg_scale = FLAGS.inference_cfg_scale
+            v = call_model(train_state, x, t_vector, dt_vector, labels)
+            x = x + v * 1.0
+            x = vae_decode(x) # Image is in [-1, 1] space.
+            x_render = np.array(jax.experimental.multihost_utils.process_allgather(x))
+            os.makedirs(FLAGS.save_dir, exist_ok=True)
+            np.save(FLAGS.save_dir + f'/x_render.npy', x_render)
+            breakpoint()
+        denoise_timesteps = FLAGS.inference_timesteps
+        num_generations = FLAGS.inference_generations
+        cfg_scale = FLAGS.inference_cfg_scale
+        x0 = []
+        x1 = []
+        lab = []
+        x_render = []
+        activations = []
+        images_shape = batch_images.shape
+        print(f"Calc FID for CFG {cfg_scale} and denoise_timesteps {denoise_timesteps}")
+        for fid_it in tqdm.tqdm(range(num_generations // FLAGS.batch_size)):
+            key = jax.random.PRNGKey(42)
+            key = jax.random.fold_in(key, fid_it)
+            key = jax.random.fold_in(key, jax.process_index())
+            eps_key, label_key = jax.random.split(key)
+            x = jax.random.normal(eps_key, images_shape)
+            labels = jax.random.randint(label_key, (images_shape[0],), 0, FLAGS.model.num_classes)
+            x, labels = shard_data(x, labels)
+            x0.append(np.array(jax.experimental.multihost_utils.process_allgather(x)))
+            delta_t = 1.0 / denoise_timesteps
+            sigmas = []
+            for ti in range(denoise_timesteps + 1):
+                t = ti / denoise_timesteps # From x_0 (noise) to x_1 (data)
+                sigmas.append(t)
+                #So this gives us n + 1 steps, because we start at n
+            i = 0
+            for ti in range(denoise_timesteps):
+                t = ti / denoise_timesteps # From x_0 (noise) to x_1 (data)
+                meanflow = True#testing regular
+                if meanflow:
+                    t = 1
+                t_vector = jnp.full((images_shape[0], ), t)
+                if FLAGS.model.train_type == 'naive':
+                    dt_flow = np.log2(FLAGS.model['denoise_timesteps']).astype(jnp.int32)
+                    dt_base = jnp.ones(images_shape[0], dtype=jnp.int32) * dt_flow # Smallest dt.
+                else: # shortcut
+                    dt_flow = np.log2(denoise_timesteps).astype(jnp.int32)
+                    dt_base = jnp.ones(images_shape[0], dtype=jnp.int32) * dt_flow
+                    # print(dt_base)
+                if meanflow:
+                    dt_base = dt_base * 0
+                #dt_base = t
+                #Need to make sure these look right..
+                #I think we want to make sure r = t for this part.
+                #And we do t normally.
+                t_vector, dt_base = shard_data(t_vector, dt_base)
+                if cfg_scale == 1:
+                    v = call_model(train_state, x, t_vector, dt_base, labels)
+                elif cfg_scale == 0:
+                    v = call_model(train_state, x, t_vector, dt_base, labels_uncond)
+                else:
+                    v_pred_uncond = call_model(train_state, x, t_vector, dt_base, labels_uncond)
+                    v_pred_label = call_model(train_state, x, t_vector, dt_base, labels)
+                    v = v_pred_uncond + cfg_scale * (v_pred_label - v_pred_uncond)
+                if FLAGS.model.train_type == 'consistency':
+                    eps = shard_data(jax.random.normal(jax.random.fold_in(eps_key, ti), images_shape))
+                    x1pred = x + v * (1-t)
+                    x = x1pred * (t+delta_t) + eps * (1-t-delta_t)
+                elif True:#Needs to be CORRECT SAMPLING FOR THIS MODEL
+                    #x = x + v * delta_t # Euler sampling.
+                    x = x - v * delta_t
+                elif False:
+                    def get_ancestral_step(t0, t1):
+                        sigma_up = None
+                        return 1 / (1 + ((t0 ** 2 * (t1 - 1) ** 4) / ((t0 - 1) ** 2 * t1 ** 4)) ** 0.5), sigma_up
+                #    def flow_sample_sde_3(model, x, ts):
+    #for s, t in tqdm(zip(ts[:-1], ts[1:]), total=len(ts) - 1):
+    #    dx = model(x, s)
+    #    denoised = x + dx * (1 - s)
+    #    noise = torch.randn_like(x)
+    #    fac_1 = (s * (1 - t) ** 2) / ((1 - s) ** 2 * t)
+    #    fac_2 = (t ** 2 - 2 * s * t ** 2 + s ** 2 * (2 * t - 1)) / ((1 - s) ** 2 * t)
+    #    fac_3 = (1 - t) * (fac_2 / t) ** 0.5
+    #    x = fac_1 * x + fac_2 * denoised + fac_3 * noise
+    #return x
+                    #So our timesteps looks like 0, 1/128..
+                    sigma_down, sigma_up = get_ancestral_step(sigmas[i], sigmas[i + 1])
+                    # Euler method
+                    dt = sigma_down - sigmas[i]
+                    #Naive up
+                    sigma_up = sigmas[i+1] - dt
+                    x = x + v * dt
+                    if sigmas[i + 1] != 1.0:
+                        x = x + jax.random.normal(eps_key, images_shape) * sigma_up * v
+                i += 1
+            x1.append(np.array(jax.experimental.multihost_utils.process_allgather(x)))
+            lab.append(np.array(jax.experimental.multihost_utils.process_allgather(labels)))
+            if FLAGS.model.use_stable_vae:
+                x = vae_decode(x) # Image is in [-1, 1] space.
+                if num_generations < 10000:
+                    x_render.append(np.array(jax.experimental.multihost_utils.process_allgather(x)))
+            #save some number of x
+            #What is x shape?
+            x = jax.image.resize(x, (x.shape[0], 299, 299, 3), method='bilinear', antialias=False)
+            x = jnp.clip(x, -1, 1)
+            acts = get_fid_activations(x)[..., 0, 0, :] # [devices, batch//devices, 2048]
+            acts = jax.experimental.multihost_utils.process_allgather(acts)
+            acts = np.array(acts)
+            activations.append(acts)
+        if jax.process_index() == 0:
+            activations = np.concatenate(activations, axis=0)
+            activations = activations.reshape((-1, activations.shape[-1]))
+            mu1 = np.mean(activations, axis=0)
+            sigma1 = np.cov(activations, rowvar=False)
+            fid = fid_from_stats(mu1, sigma1, truth_fid_stats['mu'], truth_fid_stats['sigma'])
+            print(f"FID is {fid}")
+            print(f"FID is {fid}")
+            print(f"FID is {fid}")
+            if FLAGS.save_dir is not None:
+                os.makedirs(FLAGS.save_dir, exist_ok=True)
+                x_render = np.concatenate(x_render, axis=0)
+                np.save(FLAGS.save_dir + f'/x_render.npy', x_render)
+                # x0 = np.concatenate(x0, axis=0)
+                # x1 = np.concatenate(x1, axis=0)
+                # lab = np.concatenate(lab, axis=0)
+                # os.makedirs(FLAGS.save_dir, exist_ok=True)
+                # np.save(FLAGS.save_dir + f'/x0.npy', x0)
+                # np.save(FLAGS.save_dir + f'/x1.npy', x1)
+                # np.save(FLAGS.save_dir + f'/lab.npy', lab)

meanflow/notes.txt ADDED Viewed

	@@ -0,0 +1,15 @@

+Katherines Reverse time:
+noise 0 clean 1
+sample t >= r
+z = (1 - r) * e + r * x, v = x - e
+jvp = (v, 1, 0) (v, r, t)
+u_gt = v + (t-r) * stopgrad
+sample = z = z + (t - r) * model(z,r,t)
+Although its actually model(z,r,t-r)
+Sa,pling is r=0, t=1

meanflow/targets_naive.py ADDED Viewed

	@@ -0,0 +1,35 @@

+import jax
+import jax.numpy as jnp
+import numpy as np
+def get_targets(FLAGS, key, train_state, images, labels, force_t=-1, force_dt=-1):
+    label_key, time_key, noise_key = jax.random.split(key, 3)
+    info = {}
+    labels_dropout = jax.random.bernoulli(label_key, FLAGS.model['class_dropout_prob'], (labels.shape[0],))
+    labels_dropped = jnp.where(labels_dropout, FLAGS.model['num_classes'], labels)
+    info['dropped_ratio'] = jnp.mean(labels_dropped == FLAGS.model['num_classes'])
+    # Sample t.
+    t = jax.random.randint(time_key, (images.shape[0],), minval=0, maxval=FLAGS.model['denoise_timesteps']).astype(jnp.float32)
+    t /= FLAGS.model['denoise_timesteps']
+    force_t_vec = jnp.ones(images.shape[0], dtype=jnp.float32) * force_t
+    t = jnp.where(force_t_vec != -1, force_t_vec, t)         # If force_t is not -1, then use force_t.
+    t_full = t[:, None, None, None] # [batch, 1, 1, 1]
+    # Sample flow pairs x_t, v_t.
+    if 'latent' in FLAGS.dataset_name:
+        x_0 = images[..., :images.shape[-1] // 2]
+        x_1 = images[..., images.shape[-1] // 2:]
+        x_t = (1 - (1 - 1e-5) * t_full) * x_0 + t_full * x_1
+        v_t = x_1 - (1 - 1e-5) * x_0
+    else:
+        x_1 = images
+        x_0 = jax.random.normal(noise_key, images.shape)
+        x_t = (1 - (1 - 1e-5) * t_full) * x_0 + t_full * x_1
+        v_t = x_1 - (1 - 1e-5) * x_0
+    dt_flow = np.log2(FLAGS.model['denoise_timesteps']).astype(jnp.int32)
+    dt_base = jnp.ones(images.shape[0], dtype=jnp.int32) * dt_flow
+    return x_t, v_t, t, dt_base, labels_dropped, info