KublaiKhan1 commited on Aug 29, 2025

Commit

93a9c9c

verified ·

1 Parent(s): 8f715bd

Upload folder using huggingface_hub

Browse files

Files changed (21) hide show

f16c16/all_stats.py +429 -0
f16c16/decode_only.py +108 -0
f16c16/encode_latents.py +338 -0
f16c16/eval_fid.py +214 -0
f16c16/evaluator.py +654 -0
f16c16/graph-data.py +169 -0
f16c16/kl_test.py +31 -0
f16c16/latent_distances.py +293 -0
f16c16/make_samples.py +205 -0
f16c16/models/__pycache__/discriminator.cpython-310.pyc +0 -0
f16c16/models/__pycache__/discriminator.cpython-312.pyc +0 -0
f16c16/models/__pycache__/vqvae.cpython-310.pyc +0 -0
f16c16/models/__pycache__/vqvae.cpython-312.pyc +0 -0
f16c16/models/back_model.py +343 -0
f16c16/models/discriminator.py +123 -0
f16c16/models/vqvae.py +527 -0
f16c16/ppl_images.py +255 -0
f16c16/ppl_latents.py +307 -0
f16c16/ppl_latents2.py +283 -0
f16c16/stats.py +362 -0
f16c16/train.py +676 -0

f16c16/all_stats.py ADDED Viewed

	@@ -0,0 +1,429 @@

+try: # For debugging
+    from localutils.debugger import enable_debug
+    enable_debug()
+except ImportError:
+    pass
+#import jax
+#jax.config.update('jax_platform_name', 'cpu')
+import os
+#Apparently we've always been running this code on cpu.
+# os.environ["JAX_PLATFORMS"] = 'cpu'
+import jax
+import lpips
+loss_fn_alex = lpips.LPIPS(net='alex') # best forward scores
+loss_fn_alex = loss_fn_alex.cuda()
+from dadapy.data import Data
+import numpy as np
+import flax.linen as nn
+import jax.numpy as jnp
+from absl import app, flags
+from functools import partial
+import numpy as np
+import tqdm
+import flax
+import optax
+import wandb
+from ml_collections import config_flags
+#import elements
+import ml_collections
+import tensorflow_datasets as tfds
+import tensorflow as tf
+tf.config.set_visible_devices([], "GPU")
+tf.config.set_visible_devices([], "TPU")
+import matplotlib.pyplot as plt
+from typing import Any
+from utils.train_state import TrainState, target_update
+from utils.checkpoint import Checkpoint
+from utils.fid import get_fid_network, fid_from_stats
+from train import VQGANModel
+from models.vqvae import VQVAE
+from models.discriminator import Discriminator
+from PIL import Image
+import torch
+delattr(flags.FLAGS, 'dataset_name')
+delattr(flags.FLAGS, 'load_dir')
+delattr(flags.FLAGS, 'batch_size')
+FLAGS = flags.FLAGS
+flags.DEFINE_string('dataset_name', 'imagenet256', 'Environment name.')
+flags.DEFINE_string('load_dir', "/home/dkaplan/Downloads/Models/checkpoint(1).tmp", 'Load dir (if not None, load params from here).')
+flags.DEFINE_integer('batch_size', 2, 'Total Batch size.')
+# Flags are inhereited from train.py, so pass your model parameters again here to evaluate.
+import gc
+from scipy.spatial.distance import cdist
+#
+def relative(images, latents):
+    #Get the distance matrix for images
+    #Get the distance matrix for latents
+    images = images.reshape(images.shape[0], -1)
+    latents = latents.reshape(latents.shape[0], -1)
+    image_distances = cdist(images, images, metric='euclidean')
+    latent_distances = cdist(latents, latents, metric='euclidean')
+    #Probably want cosine for latents.
+    #Now, we need to find the C that best matches....
+    #So we just do images/latents, then take stats on that.
+    c = image_distances/latent_distances
+    print("mean C", np.mean(c))
+    print("C std", np.std(c))
+def operations(reconstructed_images, decoded):
+    reconstructed_images = reconstructed_images * 2 - 1
+    decoded = decoded * 2 -1
+    #Turn from 1,2,256,256,3
+    #To 2,3,256,256
+    reconstructed_images = jax.dlpack.to_dlpack(reconstructed_images)
+    reconstructed_images = torch.utils.dlpack.from_dlpack(reconstructed_images)
+    decoded = jax.dlpack.to_dlpack(decoded)
+    decoded = torch.utils.dlpack.from_dlpack(decoded)
+    reconstructed_images = reconstructed_images.squeeze()
+    decoded = decoded.squeeze()
+    reconstructed_images = reconstructed_images.permute(0, 3, 1, 2)
+    decoded = decoded.permute(0, 3, 1, 2)
+    lpips_loss = loss_fn_alex(reconstructed_images, decoded)
+    lpips_cpu = lpips_loss.detach().cpu().squeeze().mean()
+    lpips_cpu = lpips_cpu / (.0001 ** 2)
+    return lpips_cpu
+def main(_):
+    device_count = len(jax.local_devices())
+    global_device_count = jax.device_count()
+    local_batch_size = FLAGS.batch_size // (global_device_count // device_count)
+    def get_dataset(is_train):
+        if 'imagenet' in FLAGS.dataset_name:
+            def deserialization_fn(data):
+                image = data['image']
+                min_side = tf.minimum(tf.shape(image)[0], tf.shape(image)[1])
+                image = tf.image.resize_with_crop_or_pad(image, min_side, min_side)
+                if 'imagenet256' in FLAGS.dataset_name:
+                    image = tf.image.resize(image, (256, 256))
+                elif 'imagenet128' in FLAGS.dataset_name:
+                    image = tf.image.resize(image, (128, 128))
+                else:
+                    raise ValueError(f"Unknown dataset {FLAGS.dataset_name}")
+                if is_train:
+                    image = tf.image.random_flip_left_right(image)
+                image = tf.cast(image, tf.float32) / 255.0
+                return image
+            split = tfds.split_for_jax_process('train' if is_train else 'validation', drop_remainder=True)
+            dataset = tfds.load('imagenet2012', data_dir="/data/inet", split=split)
+            dataset = dataset.map(deserialization_fn, num_parallel_calls=tf.data.AUTOTUNE)
+            dataset = dataset.shuffle(10000, seed=42, reshuffle_each_iteration=True)
+            dataset = dataset.batch(local_batch_size)
+            dataset = dataset.prefetch(tf.data.AUTOTUNE)
+            dataset = tfds.as_numpy(dataset)
+            dataset = iter(dataset)
+            return dataset
+        else:
+            raise ValueError(f"Unknown dataset {FLAGS.dataset_name}")
+    dataset = get_dataset(is_train=True)
+    dataset_valid = get_dataset(is_train=False)
+    example_obs = next(dataset)[:1]
+    rng = jax.random.PRNGKey(FLAGS.seed)
+    rng, param_key = jax.random.split(rng)
+    print("Total devices", jax.local_devices()[0])
+    ###################################
+    # Creating Model and put on devices.
+    ###################################
+    FLAGS.model.image_channels = example_obs.shape[-1]
+    FLAGS.model.image_size = example_obs.shape[1]
+    vqvae_def = VQVAE(FLAGS.model, train=True)
+    vqvae_params = vqvae_def.init({'params': param_key, 'noise': param_key}, example_obs)['params']
+    # tx = optax.adam(learning_rate=FLAGS.model['lr'], b1=FLAGS.model['beta1'], b2=FLAGS.model['beta2'])
+    vqvae_ts = TrainState.create(vqvae_def, vqvae_params)#, tx=tx) #Turning off tx because we don't need it...
+    vqvae_def_eps = VQVAE(FLAGS.model, train=False)
+    vqvae_eps_ts = TrainState.create(vqvae_def_eps, vqvae_params)
+    print("Total num of VQVAE parameters:", sum(x.size for x in jax.tree_util.tree_leaves(vqvae_params)))
+    discriminator_def = Discriminator(FLAGS.model)
+    discriminator_params = discriminator_def.init(param_key, example_obs)['params']
+    # tx = optax.adam(learning_rate=FLAGS.model['lr'], b1=FLAGS.model['beta1'], b2=FLAGS.model['beta2'])
+    discriminator_ts = TrainState.create(discriminator_def, discriminator_params)#, tx=tx)#No tx again
+    print("Total num of Discriminator parameters:", sum(x.size for x in jax.tree_util.tree_leaves(discriminator_params)))
+    model = VQGANModel(rng=rng, vqvae=vqvae_ts, vqvae_eps=vqvae_eps_ts, discriminator=discriminator_ts, config=FLAGS.model)
+    assert FLAGS.load_dir is not None
+    cp = Checkpoint(FLAGS.load_dir)
+    model = cp.load_model(model)
+    print("Loaded model with step", model.vqvae.step)
+    model = flax.jax_utils.replicate(model, devices=jax.local_devices())
+    jax.debug.visualize_array_sharding(model.vqvae.params['decoder']['Conv_0']['bias'])
+    #print(model.vqvae)
+    ####################################
+    # Noise stuff
+    ###################################
+    cpus = jax.devices("cpu")
+    i = 0
+    lpips_list = []
+    lpips_list_ppl_two = []
+    means = []
+    stds = []
+    noisy_means = []
+    noisy_stds = []
+    predicted_stds = []
+    noisy_predicted_stds = []
+    latent_list = []
+    #TODO
+    #equivariance loss, DCT shit, psnr, ssim
+    #Instead of isometry, we want... RELATIVEMTRY
+    #Gini coefficient
+    #denstity cv
+    #normalized entropy
+    #"uniformity" - basically related to the covariance loss? How spread out the pionts are
+    #relativemtry basically says:
+    #Given the function F, that turn x into x'
+    #For all possible x, y within X, |x - y| = C [x' - y'|
+    #Is this a desirable property though?
+    #Who cares, let's calculate it anyway
+    #
+    #Need to try out our own f16c16, which is the same compression as f8c4
+    #We will try
+    #1,1,2,2,4
+    #1,2,2,4,4
+    #1,2,4,8,8
+    #1,2,4,4,4
+    for valid_images in dataset_valid:
+        valid_images = valid_images.reshape((len(jax.local_devices()), -1, *valid_images.shape[1:])) # [devices, batch//devices, etc..]
+        #1, 2, 256, 256, 3
+        #Regular PPL
+        reconstructed_images, decoded, std, latents = model.reconstruction_ppl(valid_images) # [devices, 8, 256, 256, 3]
+        #Leaves channel dim out
+        mean = jnp.mean(latents, axis = [0,1,2,3])
+        std = jnp.std(latents, axis = [0,1,2,3])
+        #TODO maybe need to put this onto CPU
+        latent_list.append(latents)
+        means.append(mean)
+        stds.append(std)
+        predicted_stds.append(std)
+        lpips_list.append(operations(reconstructed_images, decoded))
+        #PPL two, walk both directions
+        reconstructed_images, decoded, std, latents, decoded_2 = model.reconstruction_ppl_two(valid_images) # [devices, 8, 256, 256, 3]
+        #For this one we don't care about reconstructed images, only decoded and decoded 2
+        lpips_list_ppl_two.append(operations(decoded, decoded_2))
+        #Ppl but images.
+        reconstructed_images, decoded, std, latents, std_noisy, latents_noisy = model.reconstruction_ppl_image(valid_images) # [devices, 8, 256, 256, 3]
+        noisy_means.append(latents_noisy.mean(axis = [0,1,2,3]))
+        noisy_stds.append(latents_noisy.std(axis = [0,1,2,3]))
+        noisy_predicted_stds.append(std_noisy)
+        #TODO WHAT IS THE LOSS FUNCTION FOR THIS ONE
+        #it's not quite perplexity, but there's two components
+        #one is that we check lpips difference as a function of final image
+        #The other is that we look at how far away the latents are, and see if that is consistent.
+        i += 1
+        #
+        if i == 500:
+            break
+    #Should be just 4 here, so... 0?
+    mean_lpips = jnp.mean(jnp.asarray(lpips_list))
+    #So our lpips list or whatever is like. Maybe we want per channel?
+    std_lpips = jnp.std(jnp.asarray(lpips_list))
+    print("PPL Regular", mean_lpips)
+    print("C std", std_lpips)
+    #So here we have 500/50,000 x 4.
+    #We can mean, get the mean per channel.
+    #We can get the std per channel.
+    print("mean of means", jnp.asarray(means).mean(axis = [0]))
+    print("stds of means", jnp.asarray(means).std(axis = [0]))
+    print("mean of stds", jnp.asarray(stds).mean(axis = [0]))
+    print("std of stds", jnp.asarray(stds).std(axis = [0]))
+    mean_lpips = jnp.mean(jnp.asarray(lpips_list_ppl_two))
+    std_lpips = jnp.std(jnp.asarray(lpips_list_ppl_two))
+    print("PPL Two", mean_lpips)
+    print("C std Two", std_lpips)
+    print("noisy mean of means", jnp.asarray(noisy_means).mean(axis = [0]))
+    print("noisy stds of means", jnp.asarray(noisy_means).std(axis = [0]))
+    print("noisy mean of stds", jnp.asarray(noisy_stds).mean(axis = [0]))
+    print("noisy std of stds", jnp.asarray(noisy_stds).std(axis = [0]))
+    print("Average noise added to image", jnp.asarray(predicted_stds).mean(axis = [0]))
+    print("Average noise added to image std", jnp.asarray(predicted_stds).std(axis = [0]))
+    print("Average noise added to noisy image", jnp.asarray(noisy_predicted_stds).mean(axis = [0, 1, 2, 3, 4]))
+    print("Average noise added to noisy image std", jnp.asarray(noisy_predicted_stds).std(axis = [0, 1, 2, 3, 4]))
+    print("Effective new variance (sqrt it)", jnp.asarray(noisy_predicted_stds).std(axis = [0,1,2,3,4]) ** 2 + jnp.asarray(stds).mean(axis = [0]) ** 2)
+    #Intrinsic
+    latent_list = np.asarray(latent_list).squeeze()
+    print(latent_list.shape)#Should be like, 500,2,32,32,4
+    latent_list = latent_list.reshape(-1,32,32,4)
+    latent_list = latent_list.reshape(latent_list.shape[0], -1)
+    latent_list = Data(latent_list)
+    latent_list.compute_distances(maxk=100)
+    # compute the intrinsic dimension using 2nn estimator
+    id, id_error, id_distance = latent_list.compute_id_2NN()
+    print(id, id_error, id_distance)
+    #None of these stats take anything else into account.
+    #No normalization, nothing
+    """PL 100
+    PPL Regular 6.3766294
+    C std 0.9229477
+    mean of means 0.16227543
+    stds of means 0.53616405
+    mean of stds 4.4914503
+    std of stds 0.6015057
+    PPL Two 6.3642726
+    C std Two 0.92391133
+    """
+    """1e-4
+    PPL Regular 12.521122
+    C std 2.3125298
+    mean of means 0.0065882676
+    stds of means 0.042861093
+    mean of stds 0.7608507
+    std of stds 0.05846726
+    PPL Two 12.581134
+    C std Two 2.5102239
+    Average noise added to image 0.5992337
+    Average noise added to image std 0.25218853
+    """
+    """1e-5
+    PPL Regular 13.183324
+    C std 2.9292953
+    mean of means 0.0065166513
+    stds of means 0.06983645
+    mean of stds 0.9855982
+    std of stds 0.05810356
+    PPL Two 13.193566
+    C std Two 2.9465785
+    Average noise added to image 0.16906397
+    Average noise added to image std 0.12756345
+    """
+    """1e-6
+    PPL Regular 14.146276
+    C std 3.6374733
+    mean of means -0.018107202
+    stds of means 0.11694455
+    mean of stds 1.0860059
+    std of stds 0.09732369
+    PPL Two 14.116948
+    C std Two 3.547216
+    Average noise added to image 0.039256155
+    Average noise added to image std 0.026851926
+    """
+    """AE
+    PPL Regular 10.103417
+    C std 2.2966182
+    mean of means 0.35234922
+    stds of means 0.4036692
+    mean of stds 2.6363409
+    std of stds 0.30666474
+    PPL Two 10.075436
+    C std Two 2.2949345
+    No noise added to image
+    """
+    """Dino 1e-5
+    PPL Regular 2.373527
+    C std 0.45295972
+    mean of means 2.5987418
+    stds of means 3.097953
+    mean of stds 49.437305
+    std of stds 2.5111952
+    PPL Two 2.3797483
+    C std Two 0.49930122
+    noisy mean of means 2.598704
+    noisy stds of means 3.0979395
+    noisy mean of stds 49.437298
+    noisy std of stds 2.5112264
+    """
+    #58.344119061134336 0.0 57.78905382129868
+if __name__ == '__main__':
+    app.run(main)

f16c16/decode_only.py ADDED Viewed

	@@ -0,0 +1,108 @@

+try: # For debugging
+    from localutils.debugger import enable_debug
+    enable_debug()
+except ImportError:
+    pass
+#import jax
+#jax.config.update('jax_platform_name', 'cpu')
+import os
+import jax
+import flax.linen as nn
+import jax.numpy as jnp
+from absl import app, flags
+from functools import partial
+import numpy as np
+import tqdm
+import flax
+import optax
+import wandb
+from ml_collections import config_flags
+#import elements
+import ml_collections
+import tensorflow_datasets as tfds
+import tensorflow as tf
+tf.config.set_visible_devices([], "GPU")
+tf.config.set_visible_devices([], "TPU")
+import matplotlib.pyplot as plt
+from typing import Any
+from utils.train_state import TrainState, target_update
+from utils.checkpoint import Checkpoint
+from utils.fid import get_fid_network, fid_from_stats
+from train import VQGANModel
+from models.vqvae import VQVAE
+from models.discriminator import Discriminator
+from PIL import Image
+delattr(flags.FLAGS, 'dataset_name')
+delattr(flags.FLAGS, 'load_dir')
+delattr(flags.FLAGS, 'batch_size')
+FLAGS = flags.FLAGS
+flags.DEFINE_string('dataset_name', 'imagenet256', 'Environment name.')
+flags.DEFINE_string('load_dir', "/home/dkaplan/Documents/LiClipse Workspace/VAE/jax-vqvae-vqgan/7e-5_sdlike_sym/checkpoint.tmp", 'Load dir (if not None, load params from here).')
+flags.DEFINE_integer('batch_size', 2, 'Total Batch size.')
+# Flags are inhereited from train.py, so pass your model parameters again here to evaluate.
+def main(_):
+    device_count = len(jax.local_devices())
+    global_device_count = jax.device_count()
+    local_batch_size = FLAGS.batch_size // (global_device_count // device_count)
+    rng = jax.random.PRNGKey(FLAGS.seed)
+    rng, param_key = jax.random.split(rng)
+    print("Total devices", jax.local_devices()[0])
+    ###################################
+    # Creating Model and put on devices.
+    ###################################
+    vqvae_def = VQVAE(FLAGS.model, train=True)
+    vqvae_params = vqvae_def.init({'params': param_key, 'noise': param_key}, example_obs)['params']
+    tx = optax.adam(learning_rate=FLAGS.model['lr'], b1=FLAGS.model['beta1'], b2=FLAGS.model['beta2'])
+    vqvae_ts = TrainState.create(vqvae_def, vqvae_params, tx=tx)
+    vqvae_def_eps = VQVAE(FLAGS.model, train=False)
+    vqvae_eps_ts = TrainState.create(vqvae_def_eps, vqvae_params)
+    print("Total num of VQVAE parameters:", sum(x.size for x in jax.tree_util.tree_leaves(vqvae_params)))
+    discriminator_def = Discriminator(FLAGS.model)
+    discriminator_params = discriminator_def.init(param_key, example_obs)['params']
+    tx = optax.adam(learning_rate=FLAGS.model['lr'], b1=FLAGS.model['beta1'], b2=FLAGS.model['beta2'])
+    discriminator_ts = TrainState.create(discriminator_def, discriminator_params, tx=tx)
+    print("Total num of Discriminator parameters:", sum(x.size for x in jax.tree_util.tree_leaves(discriminator_params)))
+    model = VQGANModel(rng=rng, vqvae=vqvae_ts, vqvae_eps=vqvae_eps_ts, discriminator=discriminator_ts, config=FLAGS.model)
+    assert FLAGS.load_dir is not None
+    cp = Checkpoint(FLAGS.load_dir)
+    model = cp.load_model(model)
+    print("Loaded model with step", model.vqvae.step)
+    model = flax.jax_utils.replicate(model, devices=jax.local_devices())
+    jax.debug.visualize_array_sharding(model.vqvae.params['decoder']['Conv_0']['bias'])
+    return model
+#Stuff and things.
+        # image2 = valid_reconstructed_images[0,0,:,:,:]
+        # image2 = (image2 * 255).astype(np.uint8)
+        # image2 = np.array(image2)
+        # image2 = Image.fromarray(image2)
+        # image2.save("recon" + str(i) + ".png")
+#        images.append((valid_reconstructed_images*255).astype(np.uint8))
+if __name__ == '__main__':
+    app.run(main)

f16c16/encode_latents.py ADDED Viewed

	@@ -0,0 +1,338 @@

+try: # For debugging
+    from localutils.debugger import enable_debug
+    enable_debug()
+except ImportError:
+    pass
+#GPU, batch 16, latent:
+"""[[[[-9.51360688e-02 -6.00612536e-02 -6.76547512e-02 -3.73330832e-01]
+    [-3.10049266e-01 -6.82027787e-02  1.09544434e-01 -1.51526511e-01]
+    [-1.63606599e-01  1.52324408e-01  1.03230253e-01 -3.34064662e-01]
+    ...
+    [-9.08230543e-02  2.53294855e-01  6.09488077e-02 -3.55355501e-01]
+    [-2.16098756e-01 -3.44716787e-01  5.68981618e-02 -1.19108176e+00]
+    [ 9.24487635e-02  2.20324457e-01  1.84478119e-01  4.46850598e-01]]
+   [[-1.60119295e-01  2.00234763e-02 -1.43943653e-01 -2.22745568e-01]
+    [-2.55345762e-01  1.55626327e-01  4.85354941e-03 -1.33636221e-01]
+    [-1.64813206e-01  1.63652197e-01 -6.96032941e-02 -3.96138221e-01]
+    ...
+    [-1.74221992e-01  2.78679162e-01 -1.02342315e-01 -4.71356630e-01]
+    [-9.72934887e-02  2.24700689e-01 -1.54692575e-01 -8.07371676e-01]
+    [ 1.58384442e-02  9.63119492e-02  4.84653771e-01  8.73409092e-01]]
+   [[-1.16939977e-01  2.56956398e-01 -1.04373530e-01 -1.33346528e-01]
+    [-1.52860105e-01  1.76005200e-01 -1.16914781e-02 -1.92210004e-01]
+    [-5.50103635e-02  2.04600886e-01 -1.73305750e-01 -4.94984031e-01]
+    ...
+    [-3.88413459e-01  3.15461606e-01 -1.25539899e-01 -5.62439263e-01]
+    [-1.97147772e-01 -2.31708195e-02 -1.44041494e-01 -8.99005592e-01]
+    [ 3.42922032e-01  2.24075779e-01  4.25257713e-01  5.85853398e-01]]
+"""
+#CPU, batch 16, latent
+"""
+[[[[-8.47917721e-02 -8.92071351e-02 -1.05532585e-02 -3.59174877e-01]
+    [-1.11725748e-01 -1.22415572e-01  3.33435684e-02 -3.60438257e-01]
+    [-1.36060238e-01 -1.37327328e-01  3.79590057e-02 -3.73947173e-01]
+    ...
+    [ 7.88694695e-02 -5.03079742e-02  6.75498620e-02 -3.39441150e-01]
+    [-1.63178548e-01 -3.21848512e-01  1.72039792e-02 -9.50528085e-01]
+    [ 2.21429523e-02  1.48582339e-01  1.54685006e-01  6.86266243e-01]]
+   [[-1.69139117e-01  7.81316869e-03  4.33448888e-02 -3.37453634e-01]
+    [-1.96011692e-01 -4.98509258e-02  3.32896858e-02 -3.53303224e-01]
+    [-9.82111022e-02 -1.94629002e-02 -1.63653865e-02 -3.32124978e-01]
+    ...
+    [-7.72062615e-02  2.95878220e-02 -7.62912910e-03 -3.61496925e-01]
+    [-2.26189673e-01 -5.97889721e-02 -1.16483821e-02 -7.82557964e-01]
+    [-6.18810430e-02  7.75512159e-02  2.37205133e-01  8.39313030e-01]]
+   [[-9.37198251e-02 -4.58365604e-02 -2.44572274e-02 -3.00568134e-01]
+    [-1.32911175e-01 -9.60890502e-02 -4.78822738e-04 -3.28105956e-01]
+    [-7.67295957e-02 -6.57245517e-02 -3.78448963e-02 -3.29079330e-01]
+    ...
+    [-1.21173687e-01  4.07976359e-02  4.05129045e-02 -3.48512828e-01]
+    [-1.64501339e-01 -9.52737629e-02 -1.06653105e-03 -8.39630961e-01]
+    [ 2.64041096e-01  2.43525319e-02  3.05205405e-01  4.92310941e-01]]
+"""
+#CPU, 8 vs GPU 8
+"""
+[[[[[-3.18646997e-01 -4.77920741e-01  1.07763827e+00  1.70530510e+00]
+    [-6.31720126e-01 -2.49106735e-01  1.66874206e+00 -5.45821428e-01]
+    [-4.03593808e-01  2.76418477e-01  1.29216135e+00  8.79887521e-01]
+    ...
+    [-2.03093603e-01 -7.97204554e-01  3.61778885e-01 -3.68656218e-01]
+    [-2.61139393e-01  1.64036989e+00 -2.22024798e-01  3.49313989e-02]
+    [ 6.32668972e-01 -4.74448204e-01  1.55093277e+00  5.57837903e-01]]
+   [[-7.24952042e-01  4.80744302e-01  3.05105478e-01  1.06132841e+00]
+    [ 8.95307362e-02  1.45687327e-01  1.57945228e+00 -1.11452961e+00]
+    [-4.61988777e-01 -4.11880344e-01  1.70428991e+00  4.31171536e-01]
+    ...
+    [-1.17851949e+00  2.03509808e-01  1.84925032e+00 -5.68852723e-01]
+    [ 5.74628949e-01 -8.48990500e-01 -2.50778824e-01  1.92248678e+00]
+    [-2.69778688e-02 -8.46022546e-01 -7.89667487e-01  9.26319182e-01]]
+   [[-3.10738117e-01  6.01165593e-02  1.57032907e-01  1.53192639e+00]
+    [ 6.55903339e-01  7.50707746e-01  6.03949744e-03  1.31769347e+00]
+    [ 3.26834202e-01 -2.33611539e-01  1.35725603e-01 -2.39371091e-01]
+    ...
+    [ 2.19290599e-01 -2.21653271e+00 -2.21055865e+00  1.49363160e+00]
+    [-1.45460200e+00  1.18737824e-01  1.56015289e+00  8.23014230e-03]
+    [ 3.44308168e-01  1.08958745e+00 -1.23330317e-01  5.41093886e-01]]
+#GPU
+"""
+#import jax
+#jax.config.update('jax_platform_name', 'cpu')
+import os
+# os.environ["JAX_PLATFORMS"] = 'cpu'
+import jax
+import lpips
+loss_fn_alex = lpips.LPIPS(net='alex') # best forward scores
+loss_fn_alex = loss_fn_alex.cuda()
+import numpy as np
+import flax.linen as nn
+import jax.numpy as jnp
+from absl import app, flags
+from functools import partial
+import numpy as np
+import tqdm
+import flax
+import optax
+import wandb
+from ml_collections import config_flags
+#import elements
+import ml_collections
+import tensorflow_datasets as tfds
+import tensorflow as tf
+tf.config.set_visible_devices([], "GPU")
+tf.config.set_visible_devices([], "TPU")
+import matplotlib.pyplot as plt
+from typing import Any
+from utils.train_state import TrainState, target_update
+from utils.checkpoint import Checkpoint
+from utils.fid import get_fid_network, fid_from_stats
+from train import VQGANModel
+from models.vqvae import VQVAE
+from models.discriminator import Discriminator
+from PIL import Image
+import torch
+delattr(flags.FLAGS, 'dataset_name')
+delattr(flags.FLAGS, 'load_dir')
+delattr(flags.FLAGS, 'batch_size')
+FLAGS = flags.FLAGS
+flags.DEFINE_string('dataset_name', 'imagenet256', 'Environment name.')
+flags.DEFINE_string('load_dir', "/home/dkaplan/Downloads/Models/checkpoint(1).tmp", 'Load dir (if not None, load params from here).')
+from safetensors.torch import save_file
+flags.DEFINE_integer('batch_size', 8, 'Total Batch size.')
+# Flags are inhereited from train.py, so pass your model parameters again here to evaluate.
+import gc
+def main(_):
+    device_count = len(jax.local_devices())
+    global_device_count = jax.device_count()
+    local_batch_size = FLAGS.batch_size // (global_device_count // device_count)
+    def get_dataset(is_train):
+        if 'imagenet' in FLAGS.dataset_name:
+            def deserialization_fn(data):
+                image = data['image']
+                label = data["label"]
+                min_side = tf.minimum(tf.shape(image)[0], tf.shape(image)[1])
+                image = tf.image.resize_with_crop_or_pad(image, min_side, min_side)
+                if 'imagenet256' in FLAGS.dataset_name:
+                    image = tf.image.resize(image, (256, 256))
+                elif 'imagenet128' in FLAGS.dataset_name:
+                    image = tf.image.resize(image, (128, 128))
+                else:
+                    raise ValueError(f"Unknown dataset {FLAGS.dataset_name}")
+                if is_train:
+                    # image = tf.image.random_flip_left_right(image)
+                    image_flip =tf.image.flip_left_right(image)
+                    image_flip = tf.cast(image_flip, tf.float32) / 255.0
+                    image = tf.cast(image, tf.float32) / 255.0
+                    return image, image_flip, label
+                image = tf.cast(image, tf.float32) / 255.0
+                return image, label
+            split = tfds.split_for_jax_process('train' if is_train else 'validation', drop_remainder=True)
+            dataset = tfds.load('imagenet2012', data_dir="/data/inet", split=split)
+            dataset = dataset.map(deserialization_fn, num_parallel_calls=tf.data.AUTOTUNE)
+            dataset = dataset.shuffle(10000, seed=42, reshuffle_each_iteration=True)
+            dataset = dataset.batch(local_batch_size)
+            dataset = dataset.prefetch(tf.data.AUTOTUNE)
+            dataset = tfds.as_numpy(dataset)
+            dataset = iter(dataset)
+            return dataset
+        else:
+            raise ValueError(f"Unknown dataset {FLAGS.dataset_name}")
+    dataset = get_dataset(is_train=True)
+    dataset_valid = get_dataset(is_train=False)
+    # image = Image.open("osman.png")
+    # image = np.array(image) / 255.0
+    # print(image)
+    # image = jnp.array(image)
+    # image = jnp.expand_dims(image, 0)
+    # image = jnp.expand_dims(image, 0)
+    example_obs = next(dataset)[:1][0]
+    #Reconstruction loop
+    # image = model.reconstruction(image)
+    # image = image[0,0,:,:,:]
+    # image = (image * 255).astype(np.uint8)
+    # image = np.array(image)
+    # img = Image.fromarray(image)
+    # img.save("osman" + str(i) + ".png")
+    rng = jax.random.PRNGKey(FLAGS.seed)
+    rng, param_key = jax.random.split(rng)
+    print("Total devices", jax.local_devices()[0])
+    ###################################
+    # Creating Model and put on devices.
+    ###################################
+    FLAGS.model.image_channels = example_obs.shape[-1]
+    FLAGS.model.image_size = example_obs.shape[1]
+    vqvae_def = VQVAE(FLAGS.model, train=True)
+    vqvae_params = vqvae_def.init({'params': param_key, 'noise': param_key}, example_obs)['params']
+    # tx = optax.adam(learning_rate=FLAGS.model['lr'], b1=FLAGS.model['beta1'], b2=FLAGS.model['beta2'])
+    vqvae_ts = TrainState.create(vqvae_def, vqvae_params)#, tx=tx) #Turning off tx because we don't need it...
+    vqvae_def_eps = VQVAE(FLAGS.model, train=False)
+    vqvae_eps_ts = TrainState.create(vqvae_def_eps, vqvae_params)
+    print("Total num of VQVAE parameters:", sum(x.size for x in jax.tree_util.tree_leaves(vqvae_params)))
+    discriminator_def = Discriminator(FLAGS.model)
+    discriminator_params = discriminator_def.init(param_key, example_obs)['params']
+    # tx = optax.adam(learning_rate=FLAGS.model['lr'], b1=FLAGS.model['beta1'], b2=FLAGS.model['beta2'])
+    discriminator_ts = TrainState.create(discriminator_def, discriminator_params)#, tx=tx)#No tx again
+    print("Total num of Discriminator parameters:", sum(x.size for x in jax.tree_util.tree_leaves(discriminator_params)))
+    model = VQGANModel(rng=rng, vqvae=vqvae_ts, vqvae_eps=vqvae_eps_ts, discriminator=discriminator_ts, config=FLAGS.model)
+    assert FLAGS.load_dir is not None
+    cp = Checkpoint(FLAGS.load_dir)
+    model = cp.load_model(model)
+    print("Loaded model with step", model.vqvae.step)
+    model = flax.jax_utils.replicate(model, devices=jax.local_devices())
+    jax.debug.visualize_array_sharding(model.vqvae.params['decoder']['Conv_0']['bias'])
+    #print(model.vqvae)
+    latents = []
+    latents_flip = []
+    labels = []
+    saved_files = 0
+    for image, image_flip, label in dataset:
+        #Also need to hflp the image
+        image = image.reshape((len(jax.local_devices()), -1, *image.shape[1:])) # [devices, batch//devices, etc..]
+        latent, result_dict = model.get_latent(image)
+        image_flip = image_flip.reshape((len(jax.local_devices()), -1, *image_flip.shape[1:])) # [devices, batch//devices, etc..]
+        latent_flip, result_dict_flip = model.get_latent(image_flip)
+        latents.append(latent.squeeze())
+        latents_flip.append(latent_flip.squeeze())
+        labels.append(label)
+        if len(latents) == 5000:#Since we are bs 2, should be 5k
+            latents = jnp.concatenate(latents, axis=0)
+            latents_flip = jnp.concatenate(latents_flip, axis=0)
+            labels = jnp.concatenate(labels, axis=0)
+            latents_torch = np.asarray(latents)
+            latents_torch = torch.from_numpy(np.copy(latents_torch))
+            latents_flip_torch = np.asarray(latents_flip)
+            latents_flip_torch = torch.from_numpy(np.copy(latents_flip_torch))
+            labels_torch = np.asarray(labels)
+            labels_torch = torch.from_numpy(np.copy(labels_torch))
+            save_dict = {
+                    'latents': latents_torch,
+                    'latents_flip': latents_flip_torch,
+                    'labels': labels_torch
+            }
+            print(latents_torch.shape)#400,32,32,4
+            print(latents_flip_torch.shape)#^
+            print(labels_torch.shape)#400
+            #Now we need to calculate the man
+            # print("Total mean", latents_torch.mean(axis = [0]))
+            # class_means = {}
+            # for label, tensor in zip(labels_torch, latents_torch):
+            #     label = str(label.item())
+            #     if label in class_means.keys():
+            #         class_means[label].append(tensor)
+            #     else:
+            #         class_means[label] = [tensor]
+            #
+            #
+            # for iclass in class_means.keys():
+            #     #So now we have a list of tensors
+            #     stacked_tensors = torch.stack(class_means[iclass])
+            #     mean = stacked_tensors.mean(axis = [0])
+            #     print(mean)
+            #     print(iclass)
+            # exit()
+            output_dir = "/data/inet_latents"
+            save_filename = os.path.join(output_dir, f'latents_shard{saved_files:03d}.safetensors')
+            save_file(
+                    save_dict,
+                    save_filename,
+                    metadata={'total_size': f'{latents_torch.shape[0]}', 'dtype': f'{latents_torch.dtype}', 'device': f'{latents_torch.device}'}
+                )
+            latents = []
+            latents_flip = []
+            labels = []
+            saved_files += 1
+        #Let's just run the kl2 first and not save the extra
+        # print(latent.shape)
+        # print(result_dict)
+if __name__ == '__main__':
+    app.run(main)

f16c16/eval_fid.py ADDED Viewed

	@@ -0,0 +1,214 @@

+try: # For debugging
+    from localutils.debugger import enable_debug
+    enable_debug()
+except ImportError:
+    pass
+import flax.linen as nn
+import jax.numpy as jnp
+from absl import app, flags
+from functools import partial
+import numpy as np
+import tqdm
+import jax
+import jax.numpy as jnp
+import flax
+import optax
+import wandb
+from ml_collections import config_flags
+#import elements
+import ml_collections
+import tensorflow_datasets as tfds
+import tensorflow as tf
+tf.config.set_visible_devices([], "GPU")
+tf.config.set_visible_devices([], "TPU")
+import matplotlib.pyplot as plt
+from typing import Any
+from utils.train_state import TrainState, target_update
+from utils.checkpoint import Checkpoint
+from utils.fid import get_fid_network, fid_from_stats
+from train import VQGANModel
+from models.vqvae import VQVAE
+from models.discriminator import Discriminator
+delattr(flags.FLAGS, 'dataset_name')
+delattr(flags.FLAGS, 'load_dir')
+delattr(flags.FLAGS, 'batch_size')
+FLAGS = flags.FLAGS
+flags.DEFINE_string('dataset_name', 'imagenet256', 'Environment name.')
+flags.DEFINE_string('load_dir', "./checkpointbest.tmp.tmp", 'Load dir (if not None, load params from here).')
+flags.DEFINE_integer('batch_size', 128, 'Total Batch size.')
+# Flags are inhereited from train.py, so pass your model parameters again here to evaluate.
+def main(_):
+    device_count = len(jax.local_devices())
+    global_device_count = jax.device_count()
+    local_batch_size = FLAGS.batch_size // (global_device_count // device_count)
+    def get_dataset(is_train):
+        if 'imagenet' in FLAGS.dataset_name:
+            def deserialization_fn(data):
+                image = data['image']
+                min_side = tf.minimum(tf.shape(image)[0], tf.shape(image)[1])
+                image = tf.image.resize_with_crop_or_pad(image, min_side, min_side)
+                if 'imagenet256' in FLAGS.dataset_name:
+                    image = tf.image.resize(image, (256, 256))
+                elif 'imagenet128' in FLAGS.dataset_name:
+                    image = tf.image.resize(image, (128, 128))
+                else:
+                    raise ValueError(f"Unknown dataset {FLAGS.dataset_name}")
+                if is_train:
+                    image = tf.image.random_flip_left_right(image)
+                image = tf.cast(image, tf.float32) / 255.0
+                return image
+            split = tfds.split_for_jax_process('train' if is_train else 'validation', drop_remainder=True)
+            dataset = tfds.load('imagenet2012', data_dir="/dev/shm", split=split)
+            dataset = dataset.map(deserialization_fn, num_parallel_calls=tf.data.AUTOTUNE)
+            dataset = dataset.shuffle(10000, seed=42, reshuffle_each_iteration=True)
+            dataset = dataset.batch(local_batch_size)
+            dataset = dataset.prefetch(tf.data.AUTOTUNE)
+            dataset = tfds.as_numpy(dataset)
+            dataset = iter(dataset)
+            return dataset
+        else:
+            raise ValueError(f"Unknown dataset {FLAGS.dataset_name}")
+    dataset = get_dataset(is_train=False)
+    dataset_valid = get_dataset(is_train=False)
+    example_obs = next(dataset)[:1]
+    get_fid_activations = get_fid_network()
+    truth_fid_stats = np.load('data/imagenet256_fidstats_openai.npz')
+#    truth_fid_stats = np.load('base_stats.npz')
+    rng = jax.random.PRNGKey(FLAGS.seed)
+    rng, param_key = jax.random.split(rng)
+    print("Total Memory on device:", float(jax.local_devices()[0].memory_stats()['bytes_limit']) / 1024**3, "GB")
+    ###################################
+    # Creating Model and put on devices.
+    ###################################
+    FLAGS.model.image_channels = example_obs.shape[-1]
+    FLAGS.model.image_size = example_obs.shape[1]
+    vqvae_def = VQVAE(FLAGS.model, train=True)
+    vqvae_params = vqvae_def.init({'params': param_key, 'noise': param_key}, example_obs)['params']
+    tx = optax.adam(learning_rate=FLAGS.model['lr'], b1=FLAGS.model['beta1'], b2=FLAGS.model['beta2'])
+    vqvae_ts = TrainState.create(vqvae_def, vqvae_params, tx=tx)
+    vqvae_def_eps = VQVAE(FLAGS.model, train=False)
+    vqvae_eps_ts = TrainState.create(vqvae_def_eps, vqvae_params)
+    print("Total num of VQVAE parameters:", sum(x.size for x in jax.tree_util.tree_leaves(vqvae_params)))
+    discriminator_def = Discriminator(FLAGS.model)
+    discriminator_params = discriminator_def.init(param_key, example_obs)['params']
+    tx = optax.adam(learning_rate=FLAGS.model['lr'], b1=FLAGS.model['beta1'], b2=FLAGS.model['beta2'])
+    discriminator_ts = TrainState.create(discriminator_def, discriminator_params, tx=tx)
+    print("Total num of Discriminator parameters:", sum(x.size for x in jax.tree_util.tree_leaves(discriminator_params)))
+    model = VQGANModel(rng=rng, vqvae=vqvae_ts, vqvae_eps=vqvae_eps_ts, discriminator=discriminator_ts, config=FLAGS.model)
+    assert FLAGS.load_dir is not None
+    cp = Checkpoint(FLAGS.load_dir)
+    model = cp.load_model(model)
+    print("Loaded model with step", model.vqvae.step)
+    model = flax.jax_utils.replicate(model, devices=jax.local_devices())
+    jax.debug.visualize_array_sharding(model.vqvae.params['decoder']['Conv_0']['bias'])
+    #print(model.vqvae)
+    ###################################
+    # FID Evaluation.
+    ###################################
+    activations = []
+    activations_base = []
+    images = []
+    images_original = []
+    for valid_images in dataset_valid:
+        images_original.append((valid_images*255).astype(np.uint8))
+        if valid_images.shape[0] < local_batch_size:
+            zeros_added = local_batch_size - valid_images.shape[0]
+            valid_images = np.concatenate([valid_images, np.zeros((local_batch_size - valid_images.shape[0], *valid_images.shape[1:]))], axis=0)
+        else:
+            zeros_added = 0
+        print(len(jax.local_devices()))
+        print(valid_images.shape)
+        valid_images = valid_images.reshape((len(jax.local_devices()), -1, *valid_images.shape[1:])) # [devices, batch//devices, etc..]
+        print(valid_images.shape)
+        valid_reconstructed_images = model.reconstruction(valid_images) # [devices, 8, 256, 256, 3]
+        print(valid_reconstructed_images.shape)
+        #Whatever...
+        fig, axs = plt.subplots(2, 8, figsize=(30, 15))
+        for j in range(1):#fuck it
+            continue#Turn this off for now
+            axs[0, j].imshow(valid_images[j, 0], vmin=0, vmax=1)
+            axs[1, j].imshow(valid_reconstructed_images[j, 0], vmin=0, vmax=1)
+            #wandb.log({'reconstruction': wandb.Image(fig)}, step=i)
+        #We are not sure if we are 0-1 or if we are -1 to 1
+        #Let's try both
+        add_images = valid_reconstructed_images.reshape(-1,256,256,3)
+        if zeros_added > 0:
+            add_images = add_images[:-zeros_added, :, :, :]
+        images.append((add_images*255).astype(np.uint8))
+        #valid = (valid_reconstructed_images + 1 ) * 127.5
+        #images2.append(valid.clamp(0,255).astype(npuint8))
+        valid_reconstructed_images = jax.image.resize(valid_reconstructed_images, (valid_images.shape[0], valid_images.shape[1], 299, 299, 3),
+                                                        method='bilinear', antialias=True)
+        valid_reconstructed_images = 2 * valid_reconstructed_images - 1
+        acts = np.array(get_fid_activations(valid_reconstructed_images))[..., 0, 0, :]
+        if zeros_added > 0:
+            acts = acts[:-zeros_added]
+        activations.append(acts)
+        #Used to grab baseline truths
+        if False:
+            valid_reconstructed_images = jax.image.resize(valid_images, (valid_images.shape[0], valid_images.shape[1], 299, 299, 3),
+                                                            method='bilinear', antialias=True)
+            valid_reconstructed_images = 2 * valid_reconstructed_images - 1
+            acts = np.array(get_fid_activations(valid_reconstructed_images))[..., 0, 0, :]
+            if zeros_added > 0:
+                acts = acts[:-zeros_added]
+            activations_base.append(acts)
+        #This is fine because it's just length
+        print(len(activations) * FLAGS.batch_size)
+    images = np.concatenate(images, axis = 0)
+    #images_original = np.concatenate(images_original, axis = 0)
+    print(images.shape)#1564x32x256x256x3 #Old shape
+    #print(images_original.shape)
+    #new shape should just be 50k
+    #Reshape
+    images = images.reshape(-1, 256, 256, 3)
+    #images2 = images_original.reshape(-1,256,256,3)
+    activations = np.concatenate(activations, axis=0)
+    activations = activations.reshape((-1, activations.shape[-1]))
+    mu1 = np.mean(activations, axis=0)
+    sigma1 = np.cov(activations, rowvar=False)
+    #print(mu1)
+    #print(sigma1)
+    fid = fid_from_stats(mu1, sigma1, truth_fid_stats['mu'], truth_fid_stats['sigma'])
+    print("FID:", fid)
+    np.savez("./images_recon.npz", arr_0 = images)
+    #np.savez("./images_original.npz", arr_0 = images2)
+if __name__ == '__main__':
+    app.run(main)

f16c16/evaluator.py ADDED Viewed

	@@ -0,0 +1,654 @@

+import argparse
+import io
+import os
+import random
+import warnings
+import zipfile
+from abc import ABC, abstractmethod
+from contextlib import contextmanager
+from functools import partial
+from multiprocessing import cpu_count
+from multiprocessing.pool import ThreadPool
+from typing import Iterable, Optional, Tuple
+import numpy as np
+import requests
+import tensorflow.compat.v1 as tf
+from scipy import linalg
+from tqdm.auto import tqdm
+INCEPTION_V3_URL = "https://openaipublic.blob.core.windows.net/diffusion/jul-2021/ref_batches/classify_image_graph_def.pb"
+INCEPTION_V3_PATH = "classify_image_graph_def.pb"
+FID_POOL_NAME = "pool_3:0"
+FID_SPATIAL_NAME = "mixed_6/conv:0"
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("ref_batch", help="path to reference batch npz file")
+    parser.add_argument("sample_batch", help="path to sample batch npz file")
+    args = parser.parse_args()
+    config = tf.ConfigProto(
+        allow_soft_placement=True  # allows DecodeJpeg to run on CPU in Inception graph
+    )
+    config.gpu_options.allow_growth = True
+    evaluator = Evaluator(tf.Session(config=config))
+    print("warming up TensorFlow...")
+    # This will cause TF to print a bunch of verbose stuff now rather
+    # than after the next print(), to help prevent confusion.
+    evaluator.warmup()
+    print("computing reference batch activations...")
+    ref_acts = evaluator.read_activations(args.ref_batch)
+    print("computing/reading reference batch statistics...")
+    ref_stats, ref_stats_spatial = evaluator.read_statistics(args.ref_batch, ref_acts)
+    print("computing sample batch activations...")
+    sample_acts = evaluator.read_activations(args.sample_batch)
+    print("computing/reading sample batch statistics...")
+    sample_stats, sample_stats_spatial = evaluator.read_statistics(args.sample_batch, sample_acts)
+    print("Computing evaluations...")
+    print("Inception Score:", evaluator.compute_inception_score(sample_acts[0]))
+    print("FID:", sample_stats.frechet_distance(ref_stats))
+    print("sFID:", sample_stats_spatial.frechet_distance(ref_stats_spatial))
+    prec, recall = evaluator.compute_prec_recall(ref_acts[0], sample_acts[0])
+    print("Precision:", prec)
+    print("Recall:", recall)
+class InvalidFIDException(Exception):
+    pass
+class FIDStatistics:
+    def __init__(self, mu: np.ndarray, sigma: np.ndarray):
+        self.mu = mu
+        self.sigma = sigma
+    def frechet_distance(self, other, eps=1e-6):
+        """
+        Compute the Frechet distance between two sets of statistics.
+        """
+        # https://github.com/bioinf-jku/TTUR/blob/73ab375cdf952a12686d9aa7978567771084da42/fid.py#L132
+        mu1, sigma1 = self.mu, self.sigma
+        mu2, sigma2 = other.mu, other.sigma
+        mu1 = np.atleast_1d(mu1)
+        mu2 = np.atleast_1d(mu2)
+        sigma1 = np.atleast_2d(sigma1)
+        sigma2 = np.atleast_2d(sigma2)
+        assert (
+            mu1.shape == mu2.shape
+        ), f"Training and test mean vectors have different lengths: {mu1.shape}, {mu2.shape}"
+        assert (
+            sigma1.shape == sigma2.shape
+        ), f"Training and test covariances have different dimensions: {sigma1.shape}, {sigma2.shape}"
+        diff = mu1 - mu2
+        # product might be almost singular
+        covmean, _ = linalg.sqrtm(sigma1.dot(sigma2), disp=False)
+        if not np.isfinite(covmean).all():
+            msg = (
+                "fid calculation produces singular product; adding %s to diagonal of cov estimates"
+                % eps
+            )
+            warnings.warn(msg)
+            offset = np.eye(sigma1.shape[0]) * eps
+            covmean = linalg.sqrtm((sigma1 + offset).dot(sigma2 + offset))
+        # numerical error might give slight imaginary component
+        if np.iscomplexobj(covmean):
+            if not np.allclose(np.diagonal(covmean).imag, 0, atol=1e-3):
+                m = np.max(np.abs(covmean.imag))
+                raise ValueError("Imaginary component {}".format(m))
+            covmean = covmean.real
+        tr_covmean = np.trace(covmean)
+        return diff.dot(diff) + np.trace(sigma1) + np.trace(sigma2) - 2 * tr_covmean
+class Evaluator:
+    def __init__(
+        self,
+        session,
+        batch_size=64,
+        softmax_batch_size=512,
+    ):
+        self.sess = session
+        self.batch_size = batch_size
+        self.softmax_batch_size = softmax_batch_size
+        self.manifold_estimator = ManifoldEstimator(session)
+        with self.sess.graph.as_default():
+            self.image_input = tf.placeholder(tf.float32, shape=[None, None, None, 3])
+            self.softmax_input = tf.placeholder(tf.float32, shape=[None, 2048])
+            self.pool_features, self.spatial_features = _create_feature_graph(self.image_input)
+            self.softmax = _create_softmax_graph(self.softmax_input)
+    def warmup(self):
+        self.compute_activations(np.zeros([1, 8, 64, 64, 3]))
+    def read_activations(self, npz_path: str) -> Tuple[np.ndarray, np.ndarray]:
+        with open_npz_array(npz_path, "arr_0") as reader:
+            return self.compute_activations(reader.read_batches(self.batch_size))
+    def compute_activations(self, batches: Iterable[np.ndarray]) -> Tuple[np.ndarray, np.ndarray]:
+        """
+        Compute image features for downstream evals.
+        :param batches: a iterator over NHWC numpy arrays in [0, 255].
+        :return: a tuple of numpy arrays of shape [N x X], where X is a feature
+                 dimension. The tuple is (pool_3, spatial).
+        """
+        preds = []
+        spatial_preds = []
+        for batch in tqdm(batches):
+            batch = batch.astype(np.float32)
+            pred, spatial_pred = self.sess.run(
+                [self.pool_features, self.spatial_features], {self.image_input: batch}
+            )
+            preds.append(pred.reshape([pred.shape[0], -1]))
+            spatial_preds.append(spatial_pred.reshape([spatial_pred.shape[0], -1]))
+        return (
+            np.concatenate(preds, axis=0),
+            np.concatenate(spatial_preds, axis=0),
+        )
+    def read_statistics(
+        self, npz_path: str, activations: Tuple[np.ndarray, np.ndarray]
+    ) -> Tuple[FIDStatistics, FIDStatistics]:
+        obj = np.load(npz_path)
+        if "mu" in list(obj.keys()):
+            return FIDStatistics(obj["mu"], obj["sigma"]), FIDStatistics(
+                obj["mu_s"], obj["sigma_s"]
+            )
+        return tuple(self.compute_statistics(x) for x in activations)
+    def compute_statistics(self, activations: np.ndarray) -> FIDStatistics:
+        mu = np.mean(activations, axis=0)
+        sigma = np.cov(activations, rowvar=False)
+        return FIDStatistics(mu, sigma)
+    def compute_inception_score(self, activations: np.ndarray, split_size: int = 5000) -> float:
+        softmax_out = []
+        for i in range(0, len(activations), self.softmax_batch_size):
+            acts = activations[i : i + self.softmax_batch_size]
+            softmax_out.append(self.sess.run(self.softmax, feed_dict={self.softmax_input: acts}))
+        preds = np.concatenate(softmax_out, axis=0)
+        # https://github.com/openai/improved-gan/blob/4f5d1ec5c16a7eceb206f42bfc652693601e1d5c/inception_score/model.py#L46
+        scores = []
+        for i in range(0, len(preds), split_size):
+            part = preds[i : i + split_size]
+            kl = part * (np.log(part) - np.log(np.expand_dims(np.mean(part, 0), 0)))
+            kl = np.mean(np.sum(kl, 1))
+            scores.append(np.exp(kl))
+        return float(np.mean(scores))
+    def compute_prec_recall(
+        self, activations_ref: np.ndarray, activations_sample: np.ndarray
+    ) -> Tuple[float, float]:
+        radii_1 = self.manifold_estimator.manifold_radii(activations_ref)
+        radii_2 = self.manifold_estimator.manifold_radii(activations_sample)
+        pr = self.manifold_estimator.evaluate_pr(
+            activations_ref, radii_1, activations_sample, radii_2
+        )
+        return (float(pr[0][0]), float(pr[1][0]))
+class ManifoldEstimator:
+    """
+    A helper for comparing manifolds of feature vectors.
+    Adapted from https://github.com/kynkaat/improved-precision-and-recall-metric/blob/f60f25e5ad933a79135c783fcda53de30f42c9b9/precision_recall.py#L57
+    """
+    def __init__(
+        self,
+        session,
+        row_batch_size=10000,
+        col_batch_size=10000,
+        nhood_sizes=(3,),
+        clamp_to_percentile=None,
+        eps=1e-5,
+    ):
+        """
+        Estimate the manifold of given feature vectors.
+        :param session: the TensorFlow session.
+        :param row_batch_size: row batch size to compute pairwise distances
+                               (parameter to trade-off between memory usage and performance).
+        :param col_batch_size: column batch size to compute pairwise distances.
+        :param nhood_sizes: number of neighbors used to estimate the manifold.
+        :param clamp_to_percentile: prune hyperspheres that have radius larger than
+                                    the given percentile.
+        :param eps: small number for numerical stability.
+        """
+        self.distance_block = DistanceBlock(session)
+        self.row_batch_size = row_batch_size
+        self.col_batch_size = col_batch_size
+        self.nhood_sizes = nhood_sizes
+        self.num_nhoods = len(nhood_sizes)
+        self.clamp_to_percentile = clamp_to_percentile
+        self.eps = eps
+    def warmup(self):
+        feats, radii = (
+            np.zeros([1, 2048], dtype=np.float32),
+            np.zeros([1, 1], dtype=np.float32),
+        )
+        self.evaluate_pr(feats, radii, feats, radii)
+    def manifold_radii(self, features: np.ndarray) -> np.ndarray:
+        num_images = len(features)
+        # Estimate manifold of features by calculating distances to k-NN of each sample.
+        radii = np.zeros([num_images, self.num_nhoods], dtype=np.float32)
+        distance_batch = np.zeros([self.row_batch_size, num_images], dtype=np.float32)
+        seq = np.arange(max(self.nhood_sizes) + 1, dtype=np.int32)
+        for begin1 in range(0, num_images, self.row_batch_size):
+            end1 = min(begin1 + self.row_batch_size, num_images)
+            row_batch = features[begin1:end1]
+            for begin2 in range(0, num_images, self.col_batch_size):
+                end2 = min(begin2 + self.col_batch_size, num_images)
+                col_batch = features[begin2:end2]
+                # Compute distances between batches.
+                distance_batch[
+                    0 : end1 - begin1, begin2:end2
+                ] = self.distance_block.pairwise_distances(row_batch, col_batch)
+            # Find the k-nearest neighbor from the current batch.
+            radii[begin1:end1, :] = np.concatenate(
+                [
+                    x[:, self.nhood_sizes]
+                    for x in _numpy_partition(distance_batch[0 : end1 - begin1, :], seq, axis=1)
+                ],
+                axis=0,
+            )
+        if self.clamp_to_percentile is not None:
+            max_distances = np.percentile(radii, self.clamp_to_percentile, axis=0)
+            radii[radii > max_distances] = 0
+        return radii
+    def evaluate(self, features: np.ndarray, radii: np.ndarray, eval_features: np.ndarray):
+        """
+        Evaluate if new feature vectors are at the manifold.
+        """
+        num_eval_images = eval_features.shape[0]
+        num_ref_images = radii.shape[0]
+        distance_batch = np.zeros([self.row_batch_size, num_ref_images], dtype=np.float32)
+        batch_predictions = np.zeros([num_eval_images, self.num_nhoods], dtype=np.int32)
+        max_realism_score = np.zeros([num_eval_images], dtype=np.float32)
+        nearest_indices = np.zeros([num_eval_images], dtype=np.int32)
+        for begin1 in range(0, num_eval_images, self.row_batch_size):
+            end1 = min(begin1 + self.row_batch_size, num_eval_images)
+            feature_batch = eval_features[begin1:end1]
+            for begin2 in range(0, num_ref_images, self.col_batch_size):
+                end2 = min(begin2 + self.col_batch_size, num_ref_images)
+                ref_batch = features[begin2:end2]
+                distance_batch[
+                    0 : end1 - begin1, begin2:end2
+                ] = self.distance_block.pairwise_distances(feature_batch, ref_batch)
+            # From the minibatch of new feature vectors, determine if they are in the estimated manifold.
+            # If a feature vector is inside a hypersphere of some reference sample, then
+            # the new sample lies at the estimated manifold.
+            # The radii of the hyperspheres are determined from distances of neighborhood size k.
+            samples_in_manifold = distance_batch[0 : end1 - begin1, :, None] <= radii
+            batch_predictions[begin1:end1] = np.any(samples_in_manifold, axis=1).astype(np.int32)
+            max_realism_score[begin1:end1] = np.max(
+                radii[:, 0] / (distance_batch[0 : end1 - begin1, :] + self.eps), axis=1
+            )
+            nearest_indices[begin1:end1] = np.argmin(distance_batch[0 : end1 - begin1, :], axis=1)
+        return {
+            "fraction": float(np.mean(batch_predictions)),
+            "batch_predictions": batch_predictions,
+            "max_realisim_score": max_realism_score,
+            "nearest_indices": nearest_indices,
+        }
+    def evaluate_pr(
+        self,
+        features_1: np.ndarray,
+        radii_1: np.ndarray,
+        features_2: np.ndarray,
+        radii_2: np.ndarray,
+    ) -> Tuple[np.ndarray, np.ndarray]:
+        """
+        Evaluate precision and recall efficiently.
+        :param features_1: [N1 x D] feature vectors for reference batch.
+        :param radii_1: [N1 x K1] radii for reference vectors.
+        :param features_2: [N2 x D] feature vectors for the other batch.
+        :param radii_2: [N x K2] radii for other vectors.
+        :return: a tuple of arrays for (precision, recall):
+                 - precision: an np.ndarray of length K1
+                 - recall: an np.ndarray of length K2
+        """
+        features_1_status = np.zeros([len(features_1), radii_2.shape[1]], dtype=np.bool)
+        features_2_status = np.zeros([len(features_2), radii_1.shape[1]], dtype=np.bool)
+        for begin_1 in range(0, len(features_1), self.row_batch_size):
+            end_1 = begin_1 + self.row_batch_size
+            batch_1 = features_1[begin_1:end_1]
+            for begin_2 in range(0, len(features_2), self.col_batch_size):
+                end_2 = begin_2 + self.col_batch_size
+                batch_2 = features_2[begin_2:end_2]
+                batch_1_in, batch_2_in = self.distance_block.less_thans(
+                    batch_1, radii_1[begin_1:end_1], batch_2, radii_2[begin_2:end_2]
+                )
+                features_1_status[begin_1:end_1] |= batch_1_in
+                features_2_status[begin_2:end_2] |= batch_2_in
+        return (
+            np.mean(features_2_status.astype(np.float64), axis=0),
+            np.mean(features_1_status.astype(np.float64), axis=0),
+        )
+class DistanceBlock:
+    """
+    Calculate pairwise distances between vectors.
+    Adapted from https://github.com/kynkaat/improved-precision-and-recall-metric/blob/f60f25e5ad933a79135c783fcda53de30f42c9b9/precision_recall.py#L34
+    """
+    def __init__(self, session):
+        self.session = session
+        # Initialize TF graph to calculate pairwise distances.
+        with session.graph.as_default():
+            self._features_batch1 = tf.placeholder(tf.float32, shape=[None, None])
+            self._features_batch2 = tf.placeholder(tf.float32, shape=[None, None])
+            distance_block_16 = _batch_pairwise_distances(
+                tf.cast(self._features_batch1, tf.float16),
+                tf.cast(self._features_batch2, tf.float16),
+            )
+            self.distance_block = tf.cond(
+                tf.reduce_all(tf.math.is_finite(distance_block_16)),
+                lambda: tf.cast(distance_block_16, tf.float32),
+                lambda: _batch_pairwise_distances(self._features_batch1, self._features_batch2),
+            )
+            # Extra logic for less thans.
+            self._radii1 = tf.placeholder(tf.float32, shape=[None, None])
+            self._radii2 = tf.placeholder(tf.float32, shape=[None, None])
+            dist32 = tf.cast(self.distance_block, tf.float32)[..., None]
+            self._batch_1_in = tf.math.reduce_any(dist32 <= self._radii2, axis=1)
+            self._batch_2_in = tf.math.reduce_any(dist32 <= self._radii1[:, None], axis=0)
+    def pairwise_distances(self, U, V):
+        """
+        Evaluate pairwise distances between two batches of feature vectors.
+        """
+        return self.session.run(
+            self.distance_block,
+            feed_dict={self._features_batch1: U, self._features_batch2: V},
+        )
+    def less_thans(self, batch_1, radii_1, batch_2, radii_2):
+        return self.session.run(
+            [self._batch_1_in, self._batch_2_in],
+            feed_dict={
+                self._features_batch1: batch_1,
+                self._features_batch2: batch_2,
+                self._radii1: radii_1,
+                self._radii2: radii_2,
+            },
+        )
+def _batch_pairwise_distances(U, V):
+    """
+    Compute pairwise distances between two batches of feature vectors.
+    """
+    with tf.variable_scope("pairwise_dist_block"):
+        # Squared norms of each row in U and V.
+        norm_u = tf.reduce_sum(tf.square(U), 1)
+        norm_v = tf.reduce_sum(tf.square(V), 1)
+        # norm_u as a column and norm_v as a row vectors.
+        norm_u = tf.reshape(norm_u, [-1, 1])
+        norm_v = tf.reshape(norm_v, [1, -1])
+        # Pairwise squared Euclidean distances.
+        D = tf.maximum(norm_u - 2 * tf.matmul(U, V, False, True) + norm_v, 0.0)
+    return D
+class NpzArrayReader(ABC):
+    @abstractmethod
+    def read_batch(self, batch_size: int) -> Optional[np.ndarray]:
+        pass
+    @abstractmethod
+    def remaining(self) -> int:
+        pass
+    def read_batches(self, batch_size: int) -> Iterable[np.ndarray]:
+        def gen_fn():
+            while True:
+                batch = self.read_batch(batch_size)
+                if batch is None:
+                    break
+                yield batch
+        rem = self.remaining()
+        num_batches = rem // batch_size + int(rem % batch_size != 0)
+        return BatchIterator(gen_fn, num_batches)
+class BatchIterator:
+    def __init__(self, gen_fn, length):
+        self.gen_fn = gen_fn
+        self.length = length
+    def __len__(self):
+        return self.length
+    def __iter__(self):
+        return self.gen_fn()
+class StreamingNpzArrayReader(NpzArrayReader):
+    def __init__(self, arr_f, shape, dtype):
+        self.arr_f = arr_f
+        self.shape = shape
+        self.dtype = dtype
+        self.idx = 0
+    def read_batch(self, batch_size: int) -> Optional[np.ndarray]:
+        if self.idx >= self.shape[0]:
+            return None
+        bs = min(batch_size, self.shape[0] - self.idx)
+        self.idx += bs
+        if self.dtype.itemsize == 0:
+            return np.ndarray([bs, *self.shape[1:]], dtype=self.dtype)
+        read_count = bs * np.prod(self.shape[1:])
+        read_size = int(read_count * self.dtype.itemsize)
+        data = _read_bytes(self.arr_f, read_size, "array data")
+        return np.frombuffer(data, dtype=self.dtype).reshape([bs, *self.shape[1:]])
+    def remaining(self) -> int:
+        return max(0, self.shape[0] - self.idx)
+class MemoryNpzArrayReader(NpzArrayReader):
+    def __init__(self, arr):
+        self.arr = arr
+        self.idx = 0
+    @classmethod
+    def load(cls, path: str, arr_name: str):
+        with open(path, "rb") as f:
+            arr = np.load(f)[arr_name]
+        return cls(arr)
+    def read_batch(self, batch_size: int) -> Optional[np.ndarray]:
+        if self.idx >= self.arr.shape[0]:
+            return None
+        res = self.arr[self.idx : self.idx + batch_size]
+        self.idx += batch_size
+        return res
+    def remaining(self) -> int:
+        return max(0, self.arr.shape[0] - self.idx)
+@contextmanager
+def open_npz_array(path: str, arr_name: str) -> NpzArrayReader:
+    with _open_npy_file(path, arr_name) as arr_f:
+        version = np.lib.format.read_magic(arr_f)
+        if version == (1, 0):
+            header = np.lib.format.read_array_header_1_0(arr_f)
+        elif version == (2, 0):
+            header = np.lib.format.read_array_header_2_0(arr_f)
+        else:
+            yield MemoryNpzArrayReader.load(path, arr_name)
+            return
+        shape, fortran, dtype = header
+        if fortran or dtype.hasobject:
+            yield MemoryNpzArrayReader.load(path, arr_name)
+        else:
+            yield StreamingNpzArrayReader(arr_f, shape, dtype)
+def _read_bytes(fp, size, error_template="ran out of data"):
+    """
+    Copied from: https://github.com/numpy/numpy/blob/fb215c76967739268de71aa4bda55dd1b062bc2e/numpy/lib/format.py#L788-L886
+    Read from file-like object until size bytes are read.
+    Raises ValueError if not EOF is encountered before size bytes are read.
+    Non-blocking objects only supported if they derive from io objects.
+    Required as e.g. ZipExtFile in python 2.6 can return less data than
+    requested.
+    """
+    data = bytes()
+    while True:
+        # io files (default in python3) return None or raise on
+        # would-block, python2 file will truncate, probably nothing can be
+        # done about that.  note that regular files can't be non-blocking
+        try:
+            r = fp.read(size - len(data))
+            data += r
+            if len(r) == 0 or len(data) == size:
+                break
+        except io.BlockingIOError:
+            pass
+    if len(data) != size:
+        msg = "EOF: reading %s, expected %d bytes got %d"
+        raise ValueError(msg % (error_template, size, len(data)))
+    else:
+        return data
+@contextmanager
+def _open_npy_file(path: str, arr_name: str):
+    with open(path, "rb") as f:
+        with zipfile.ZipFile(f, "r") as zip_f:
+            if f"{arr_name}.npy" not in zip_f.namelist():
+                raise ValueError(f"missing {arr_name} in npz file")
+            with zip_f.open(f"{arr_name}.npy", "r") as arr_f:
+                yield arr_f
+def _download_inception_model():
+    if os.path.exists(INCEPTION_V3_PATH):
+        return
+    print("downloading InceptionV3 model...")
+    with requests.get(INCEPTION_V3_URL, stream=True) as r:
+        r.raise_for_status()
+        tmp_path = INCEPTION_V3_PATH + ".tmp"
+        with open(tmp_path, "wb") as f:
+            for chunk in tqdm(r.iter_content(chunk_size=8192)):
+                f.write(chunk)
+        os.rename(tmp_path, INCEPTION_V3_PATH)
+def _create_feature_graph(input_batch):
+    _download_inception_model()
+    prefix = f"{random.randrange(2**32)}_{random.randrange(2**32)}"
+    with open(INCEPTION_V3_PATH, "rb") as f:
+        graph_def = tf.GraphDef()
+        graph_def.ParseFromString(f.read())
+    pool3, spatial = tf.import_graph_def(
+        graph_def,
+        input_map={f"ExpandDims:0": input_batch},
+        return_elements=[FID_POOL_NAME, FID_SPATIAL_NAME],
+        name=prefix,
+    )
+    _update_shapes(pool3)
+    spatial = spatial[..., :7]
+    return pool3, spatial
+def _create_softmax_graph(input_batch):
+    _download_inception_model()
+    prefix = f"{random.randrange(2**32)}_{random.randrange(2**32)}"
+    with open(INCEPTION_V3_PATH, "rb") as f:
+        graph_def = tf.GraphDef()
+        graph_def.ParseFromString(f.read())
+    (matmul,) = tf.import_graph_def(
+        graph_def, return_elements=[f"softmax/logits/MatMul"], name=prefix
+    )
+    w = matmul.inputs[1]
+    logits = tf.matmul(input_batch, w)
+    return tf.nn.softmax(logits)
+def _update_shapes(pool3):
+    # https://github.com/bioinf-jku/TTUR/blob/73ab375cdf952a12686d9aa7978567771084da42/fid.py#L50-L63
+    ops = pool3.graph.get_operations()
+    for op in ops:
+        for o in op.outputs:
+            shape = o.get_shape()
+            if shape._dims is not None:  # pylint: disable=protected-access
+                # shape = [s.value for s in shape] TF 1.x
+                shape = [s for s in shape]  # TF 2.x
+                new_shape = []
+                for j, s in enumerate(shape):
+                    if s == 1 and j == 0:
+                        new_shape.append(None)
+                    else:
+                        new_shape.append(s)
+                o.__dict__["_shape_val"] = tf.TensorShape(new_shape)
+    return pool3
+def _numpy_partition(arr, kth, **kwargs):
+    num_workers = min(cpu_count(), len(arr))
+    chunk_size = len(arr) // num_workers
+    extra = len(arr) % num_workers
+    start_idx = 0
+    batches = []
+    for i in range(num_workers):
+        size = chunk_size + (1 if i < extra else 0)
+        batches.append(arr[start_idx : start_idx + size])
+        start_idx += size
+    with ThreadPool(num_workers) as pool:
+        return list(pool.map(partial(np.partition, kth=kth, **kwargs), batches))
+if __name__ == "__main__":
+    main()

f16c16/graph-data.py ADDED Viewed

	@@ -0,0 +1,169 @@

+import matplotlib.pyplot as plt
+import numpy as np
+noises = []
+numbers = np.arange(0.00, 1.0, 0.01)
+for number in numbers:
+    noises.append(float(number))
+# numbers = np.arange(.4, 3, .5)
+# for number in numbers:
+#     noises.append(float(number))
+mean_l2 = []
+mean_lpips = []
+with open("./1e-4.txt", "r") as f:
+    print("read")
+    for line in f.readlines():
+        print(line)
+        if "Mean L2" in line:
+            mean_l2.append(float(line.split(":")[1].strip()))
+        elif "Mean Lpips" in line:
+            mean_lpips.append(float(line.split(":")[1].strip()))
+mean_l2_2 = []
+mean_lpips_2 = []
+with open("./1e-5.txt", "r") as f:
+    print("read")
+    for line in f.readlines():
+        print(line)
+        if "Mean L2" in line:
+            mean_l2_2.append(float(line.split(":")[1].strip()))
+        elif "Mean Lpips" in line:
+            mean_lpips_2.append(float(line.split(":")[1].strip()))
+mean_l2_3 = []
+mean_lpips_3 = []
+with open("./2e-5.txt", "r") as f:
+    print("read")
+    for line in f.readlines():
+        print(line)
+        if "Mean L2" in line:
+            mean_l2_3.append(float(line.split(":")[1].strip()))
+        elif "Mean Lpips" in line:
+            mean_lpips_3.append(float(line.split(":")[1].strip()))
+mean_l2_4 = []
+mean_lpips_4 = []
+with open("./1e-6.txt", "r") as f:
+    print("read")
+    for line in f.readlines():
+        print(line)
+        if "Mean L2" in line:
+            mean_l2_4.append(float(line.split(":")[1].strip()))
+        elif "Mean Lpips" in line:
+            mean_lpips_4.append(float(line.split(":")[1].strip()))
+mean_l2_5 = []
+mean_lpips_5 = []
+with open("./pl600.txt", "r") as f:
+    print("read")
+    for line in f.readlines():
+        print(line)
+        if "Mean L2" in line:
+            mean_l2_5.append(float(line.split(":")[1].strip()))
+        elif "Mean Lpips" in line:
+            mean_lpips_5.append(float(line.split(":")[1].strip()))
+mean_l2_6 = []
+mean_lpips_6 = []
+with open("./100pl.txt", "r") as f:
+    print("read")
+    for line in f.readlines():
+        print(line)
+        if "Mean L2" in line:
+            mean_l2_6.append(float(line.split(":")[1].strip()))
+        elif "Mean Lpips" in line:
+            mean_lpips_6.append(float(line.split(":")[1].strip()))
+mean_l2_7 = []
+mean_lpips_7 = []
+with open("./300pl.txt", "r") as f:
+    print("read")
+    for line in f.readlines():
+        print(line)
+        if "Mean L2" in line:
+            mean_l2_7.append(float(line.split(":")[1].strip()))
+        elif "Mean Lpips" in line:
+            mean_lpips_7.append(float(line.split(":")[1].strip()))
+mean_l2_8 = []
+mean_lpips_8 = []
+with open("./1e-6_asym.txt", "r") as f:
+    print("read")
+    for line in f.readlines():
+        print(line)
+        if "Mean L2" in line:
+            mean_l2_8.append(float(line.split(":")[1].strip()))
+        elif "Mean Lpips" in line:
+            mean_lpips_8.append(float(line.split(":")[1].strip()))
+# mean_l2_6 = []
+# mean_lpips_6 = []
+# with open("./100pl.txt", "r") as f:
+#     print("read")
+#     for line in f.readlines():
+#         print(line)
+#         if "Mean L2" in line:
+#             mean_l2_6.append(float(line.split(":")[1].strip()))
+#         elif "Mean Lpips" in line:
+#             mean_lpips_6.append(float(line.split(":")[1].strip()))
+plt.figure(figsize=(10, 6))
+# Plot Mean L2
+# plt.plot(noises, mean_l2, label='Mean L2 1e-4', marker='o', linestyle='-', color='b')
+#
+# plt.plot(noises, mean_l2_3, label='Mean L2 2e-5', marker='o', linestyle='-', color='g')
+#
+# plt.plot(noises, mean_l2_2, label='Mean L2 1e-5', marker='o', linestyle='-', color='r')
+do = 100
+mean_lpips = mean_lpips[0:do]
+mean_lpips_2 = mean_lpips_2[0:do]
+mean_lpips_3 = mean_lpips_3[0:do]
+mean_lpips_4 = mean_lpips_4[0:do]
+mean_lpips_5 = mean_lpips_5[0:do]
+mean_lpips_6 = mean_lpips_6[0:do]
+mean_lpips_7 = mean_lpips_7[0:do]
+mean_lpips_8 = mean_lpips_8[0:do]
+noises = noises[0:do]
+# Plot Mean Lpips
+plt.plot(noises, mean_lpips, label='Mean Lpips 1e-4', marker='s', linestyle='--', color='r')
+plt.plot(noises, mean_lpips_3, label='Mean Lpips 2e-5', marker='s', linestyle='--', color='b')
+plt.plot(noises, mean_lpips_2, label='Mean Lpips 1e-5', marker='s', linestyle='--', color='g')
+plt.plot(noises, mean_lpips_4, label='Mean Lpips 1e-6', marker='s', linestyle='--', color='y')
+plt.plot(noises, mean_lpips_8, label='Mean Lpips 1e-6asym', marker='s', linestyle='--')
+# plt.plot(noises, mean_lpips_5, label='Mean Lpips PL600', marker='s', linestyle='--')
+# plt.plot(noises, mean_lpips_6, label='Mean Lpips Pl100', marker='s', linestyle='--')
+plt.plot(noises, mean_lpips_7, label='Mean Lpips Pl300', marker='s', linestyle='--')
+# Labels and title
+plt.xlabel('Noise Level')
+plt.ylabel('Value')
+plt.title('Mean L2 and Mean Lpips vs. Noise Level')
+# Show grid
+plt.grid(True)
+# ax = plt.gca()
+# ax.set_xlim([0,.6])
+# ax.set_ylim([0,.6])
+# ax.set_aspect('equal', adjustable='box')
+# Add legend
+plt.legend()
+# Show the plot
+plt.show()

f16c16/kl_test.py ADDED Viewed

	@@ -0,0 +1,31 @@

+import jax
+import jax.numpy as jnp
+key = jax.random.PRNGKey(0)
+x = jax.random.normal(key, (2,32,32,4))
+print(x.mean())
+means = jnp.mean(x, axis = [1,2,3])
+#So this gives us the means of each individual one, cool
+print(means)
+logvars = 0.0
+print("square of means shit", jnp.square(means))
+print(means)
+kl_loss = - 0.5 * jnp.sum(1 + logvars - jnp.square(means) - jnp.exp(logvars),axis=tuple(range(1, means.ndim)))
+print(kl_loss)
+kl_loss = jnp.mean(kl_loss)
+print(kl_loss)
+print("x mean again", x.mean())
+print(x)
+print(jnp.square(x))
+kl_loss = - 0.5 * jnp.sum(1 + logvars - jnp.square(x) - jnp.exp(logvars),axis=tuple(range(1, x.ndim)))
+print(kl_loss)
+kl_loss = jnp.mean(kl_loss)
+print(kl_loss)

f16c16/latent_distances.py ADDED Viewed

	@@ -0,0 +1,293 @@

+try: # For debugging
+    from localutils.debugger import enable_debug
+    enable_debug()
+except ImportError:
+    pass
+#import jax
+#jax.config.update('jax_platform_name', 'cpu')
+import os
+# os.environ["JAX_PLATFORMS"] = 'cpu'
+import jax
+import lpips
+loss_fn_alex = lpips.LPIPS(net='alex') # best forward scores
+loss_fn_alex = loss_fn_alex.cuda()
+import numpy as np
+import flax.linen as nn
+import jax.numpy as jnp
+from absl import app, flags
+from functools import partial
+import numpy as np
+import tqdm
+import flax
+import optax
+import wandb
+from ml_collections import config_flags
+#import elements
+import ml_collections
+import tensorflow_datasets as tfds
+import tensorflow as tf
+tf.config.set_visible_devices([], "GPU")
+tf.config.set_visible_devices([], "TPU")
+import matplotlib.pyplot as plt
+from typing import Any
+from utils.train_state import TrainState, target_update
+from utils.checkpoint import Checkpoint
+from utils.fid import get_fid_network, fid_from_stats
+from train import VQGANModel
+from models.vqvae import VQVAE
+from models.discriminator import Discriminator
+from PIL import Image
+import torch
+delattr(flags.FLAGS, 'dataset_name')
+delattr(flags.FLAGS, 'load_dir')
+delattr(flags.FLAGS, 'batch_size')
+FLAGS = flags.FLAGS
+flags.DEFINE_string('dataset_name', 'imagenet256', 'Environment name.')
+flags.DEFINE_string('load_dir', "/home/dkaplan/Downloads/Models/checkpoint(1).tmp", 'Load dir (if not None, load params from here).')
+flags.DEFINE_integer('batch_size', 2, 'Total Batch size.')
+# Flags are inhereited from train.py, so pass your model parameters again here to evaluate.
+import gc
+def main(_):
+    device_count = len(jax.local_devices())
+    global_device_count = jax.device_count()
+    local_batch_size = FLAGS.batch_size // (global_device_count // device_count)
+    def get_dataset(is_train):
+        if 'imagenet' in FLAGS.dataset_name:
+            def deserialization_fn(data):
+                image = data['image']
+                min_side = tf.minimum(tf.shape(image)[0], tf.shape(image)[1])
+                image = tf.image.resize_with_crop_or_pad(image, min_side, min_side)
+                if 'imagenet256' in FLAGS.dataset_name:
+                    image = tf.image.resize(image, (256, 256))
+                elif 'imagenet128' in FLAGS.dataset_name:
+                    image = tf.image.resize(image, (128, 128))
+                else:
+                    raise ValueError(f"Unknown dataset {FLAGS.dataset_name}")
+                if is_train:
+                    image = tf.image.random_flip_left_right(image)
+                image = tf.cast(image, tf.float32) / 255.0
+                return image
+            split = tfds.split_for_jax_process('train' if is_train else 'validation', drop_remainder=True)
+            dataset = tfds.load('imagenet2012', data_dir="/data/inet", split=split)
+            dataset = dataset.map(deserialization_fn, num_parallel_calls=tf.data.AUTOTUNE)
+            dataset = dataset.shuffle(10000, seed=42, reshuffle_each_iteration=True)
+            dataset = dataset.batch(local_batch_size)
+            dataset = dataset.prefetch(tf.data.AUTOTUNE)
+            dataset = tfds.as_numpy(dataset)
+            dataset = iter(dataset)
+            return dataset
+        else:
+            raise ValueError(f"Unknown dataset {FLAGS.dataset_name}")
+    dataset = get_dataset(is_train=True)
+    dataset_valid = get_dataset(is_train=False)
+    # image = Image.open("osman.png")
+    # image = np.array(image) / 255.0
+    # print(image)
+    # image = jnp.array(image)
+    # image = jnp.expand_dims(image, 0)
+    # image = jnp.expand_dims(image, 0)
+    example_obs = next(dataset)[:1]
+    #Reconstruction loop
+    # image = model.reconstruction(image)
+    # image = image[0,0,:,:,:]
+    # image = (image * 255).astype(np.uint8)
+    # image = np.array(image)
+    # img = Image.fromarray(image)
+    # img.save("osman" + str(i) + ".png")
+    rng = jax.random.PRNGKey(FLAGS.seed)
+    rng, param_key = jax.random.split(rng)
+    print("Total devices", jax.local_devices()[0])
+    ###################################
+    # Creating Model and put on devices.
+    ###################################
+    FLAGS.model.image_channels = example_obs.shape[-1]
+    FLAGS.model.image_size = example_obs.shape[1]
+    vqvae_def = VQVAE(FLAGS.model, train=True)
+    vqvae_params = vqvae_def.init({'params': param_key, 'noise': param_key}, example_obs)['params']
+    # tx = optax.adam(learning_rate=FLAGS.model['lr'], b1=FLAGS.model['beta1'], b2=FLAGS.model['beta2'])
+    vqvae_ts = TrainState.create(vqvae_def, vqvae_params)#, tx=tx) #Turning off tx because we don't need it...
+    vqvae_def_eps = VQVAE(FLAGS.model, train=False)
+    vqvae_eps_ts = TrainState.create(vqvae_def_eps, vqvae_params)
+    print("Total num of VQVAE parameters:", sum(x.size for x in jax.tree_util.tree_leaves(vqvae_params)))
+    discriminator_def = Discriminator(FLAGS.model)
+    discriminator_params = discriminator_def.init(param_key, example_obs)['params']
+    # tx = optax.adam(learning_rate=FLAGS.model['lr'], b1=FLAGS.model['beta1'], b2=FLAGS.model['beta2'])
+    discriminator_ts = TrainState.create(discriminator_def, discriminator_params)#, tx=tx)#No tx again
+    print("Total num of Discriminator parameters:", sum(x.size for x in jax.tree_util.tree_leaves(discriminator_params)))
+    model = VQGANModel(rng=rng, vqvae=vqvae_ts, vqvae_eps=vqvae_eps_ts, discriminator=discriminator_ts, config=FLAGS.model)
+    assert FLAGS.load_dir is not None
+    cp = Checkpoint(FLAGS.load_dir)
+    model = cp.load_model(model)
+    print("Loaded model with step", model.vqvae.step)
+    model = flax.jax_utils.replicate(model, devices=jax.local_devices())
+    jax.debug.visualize_array_sharding(model.vqvae.params['decoder']['Conv_0']['bias'])
+    #print(model.vqvae)
+    ####################################
+    # Noise stuff
+    ###################################
+    cpus = jax.devices("cpu")
+    #So there are a few ways to calculate PPL here
+    #We could take two images in image space
+    #Walk between them and check the LPIPS in the output space
+    #...actually that's basically it right?
+    #We could also do the walk in latent space, which is the same, but with ?? scaling
+    #Let's see if they are any different.
+    #We could also try taking a latent, going X/2 direction, and -X/2 direction, and seeing that.
+    i = 0
+    lpips_list = []
+    means = []
+    stds = []
+    for valid_images in dataset_valid:
+        valid_images = valid_images.reshape((len(jax.local_devices()), -1, *valid_images.shape[1:])) # [devices, batch//devices, etc..]
+        #1, 2, 256, 256, 3
+        #Given our 2 images, we want to lerp between them...
+        #We want to lerp once to point t, and once to point t + eps
+        #And then we want to get the LPIPS between those two images
+        #And then we calculate LPIPS
+        #And then we divide by eps squared, and done.
+        reconstructed_images, decoded, std, latents = model.latent_distances(valid_images) # [devices, 8, 256, 256, 3]
+        means.append(latents.mean())
+        stds.append(latents.std())
+        # print("std", std.mean())
+        print("latent mean", latents.mean())
+        print("actual latent std", latents.std())
+        #Need to change images back to -1,1
+        reconstructed_images = reconstructed_images * 2 - 1
+        decoded = decoded * 2 -1
+        #1,2,256,256,3
+        reconstructed_images = jnp.swapaxes(reconstructed_images, 0, 4)
+        decoded = jnp.swapaxes(decoded, 0, 4)
+        reconstructed_images = jnp.swapaxes(reconstructed_images, 0, 1)
+        decoded = jnp.swapaxes(decoded, 0, 1)
+        reconstructed_images = jnp.squeeze(reconstructed_images)
+        decoded = jnp.squeeze(decoded)
+        #So here, we want to put them on CPU and delete the original
+        image_np = np.asarray(reconstructed_images)
+        image_np_2 = torch.from_numpy(np.copy(image_np)).cuda()
+        decoded_np = np.asarray(decoded)
+        decoded_np_2 = torch.from_numpy(np.copy(decoded_np)).cuda()
+        lpips_loss = loss_fn_alex(image_np_2, decoded_np_2)
+        lpips_cpu = lpips_loss.detach().cpu().squeeze().mean()
+        lpips_cpu = lpips_cpu / (.0001 ** 2)
+        print(lpips_cpu)
+        lpips_list.append(lpips_cpu)
+        i += 1
+        #
+        if i == 500:
+            break
+    mean_lpips = jnp.mean(jnp.asarray(lpips_list))
+    print(mean_lpips)
+    print("mean of means", jnp.asarray(means).mean())
+    print("stds of means", jnp.asarray(means).std())
+    print("mean of stds", jnp.asarray(stds).mean())
+    print("std of stds", jnp.asarray(stds).std())
+    #actual ae sym
+    # mean of means 0.35234922
+    # stds of means 0.4036692
+    # mean of stds 2.6363409
+    # std of stds 0.30666474
+    #1e-6:
+    #mean of means -0.018107202
+    # stds of means 0.11694455
+    # mean of stds 1.0860059
+    # std of stds 0.09732369
+    #1e-5:
+    # mean of means 0.0065166513
+    # stds of means 0.06983645
+    # mean of stds 0.9855982
+    # std of stds 0.05810356
+    #1e-4:
+    # mean of means 0.0065882676
+    # stds of means 0.042861093
+    # mean of stds 0.7608507
+    # std of stds 0.05846726
+    #pl300
+    # mean of means 0.090131655
+    # stds of means 0.69894844
+    # mean of stds 5.5634923
+    # std of stds 0.6767279
+    #pl100
+    # mean of means 0.16227543
+    # stds of means 0.53616405
+    # mean of stds 4.4914503
+    # std of stds 0.6015057
+    #Maybe we want to do "std multiplied PPL"? smoo
+    #Grab the STD of the Lpips
+if __name__ == '__main__':
+    app.run(main)

f16c16/make_samples.py ADDED Viewed

	@@ -0,0 +1,205 @@

+try: # For debugging
+    from localutils.debugger import enable_debug
+    enable_debug()
+except ImportError:
+    pass
+#import jax
+#jax.config.update('jax_platform_name', 'cpu')
+import os
+# os.environ["JAX_PLATFORMS"] = 'cpu'
+import jax
+import flax.linen as nn
+import jax.numpy as jnp
+from absl import app, flags
+from functools import partial
+import numpy as np
+import tqdm
+import flax
+import optax
+import wandb
+from ml_collections import config_flags
+#import elements
+import ml_collections
+import tensorflow_datasets as tfds
+import tensorflow as tf
+tf.config.set_visible_devices([], "GPU")
+tf.config.set_visible_devices([], "TPU")
+import matplotlib.pyplot as plt
+from typing import Any
+from utils.train_state import TrainState, target_update
+from utils.checkpoint import Checkpoint
+from utils.fid import get_fid_network, fid_from_stats
+from train import VQGANModel
+from models.vqvae import VQVAE
+from models.discriminator import Discriminator
+from PIL import Image
+delattr(flags.FLAGS, 'dataset_name')
+delattr(flags.FLAGS, 'load_dir')
+delattr(flags.FLAGS, 'batch_size')
+FLAGS = flags.FLAGS
+flags.DEFINE_string('dataset_name', 'imagenet256', 'Environment name.')
+flags.DEFINE_string('load_dir', "/home/dkaplan/Documents/LiClipse Workspace/VAE/jax-vqvae-vqgan/7e-5_sdlike_sym/checkpoint.tmp", 'Load dir (if not None, load params from here).')
+flags.DEFINE_integer('batch_size', 16, 'Total Batch size.')
+# Flags are inhereited from train.py, so pass your model parameters again here to evaluate.
+def main(_):
+    device_count = len(jax.local_devices())
+    global_device_count = jax.device_count()
+    local_batch_size = FLAGS.batch_size // (global_device_count // device_count)
+    def get_dataset(is_train):
+        if 'imagenet' in FLAGS.dataset_name:
+            def deserialization_fn(data):
+                image = data['image']
+                min_side = tf.minimum(tf.shape(image)[0], tf.shape(image)[1])
+                image = tf.image.resize_with_crop_or_pad(image, min_side, min_side)
+                if 'imagenet256' in FLAGS.dataset_name:
+                    image = tf.image.resize(image, (256, 256))
+                elif 'imagenet128' in FLAGS.dataset_name:
+                    image = tf.image.resize(image, (128, 128))
+                else:
+                    raise ValueError(f"Unknown dataset {FLAGS.dataset_name}")
+                if is_train:
+                    image_flip = tf.image.flip_left_right(image)
+                    image_flip = tf.cast(image_flip, tf.float32) / 255.0
+                    image = tf.cast(image, tf.float32) / 255.0
+                    return image, image_flip, data["label"]
+                image = tf.cast(image, tf.float32) / 255.0
+                return image
+            split = tfds.split_for_jax_process('train' if is_train else 'validation', drop_remainder=True)
+            dataset = tfds.load('imagenet2012', data_dir="/data/inet", split=split)
+            dataset = dataset.map(deserialization_fn, num_parallel_calls=tf.data.AUTOTUNE)
+            dataset = dataset.shuffle(10000, seed=42, reshuffle_each_iteration=True)
+            dataset = dataset.batch(local_batch_size)
+            dataset = dataset.prefetch(tf.data.AUTOTUNE)
+            dataset = tfds.as_numpy(dataset)
+            dataset = iter(dataset)
+            return dataset
+        else:
+            raise ValueError(f"Unknown dataset {FLAGS.dataset_name}")
+    dataset = get_dataset(is_train=True)
+    dataset_valid = get_dataset(is_train=False)
+    example_obs = next(dataset)[0][:1]
+    get_fid_activations = get_fid_network()
+    truth_fid_stats = np.load('data/imagenet256_fidstats_openai.npz')
+    rng = jax.random.PRNGKey(FLAGS.seed)
+    rng, param_key = jax.random.split(rng)
+    print("Total devices", jax.local_devices()[0])
+    ###################################
+    # Creating Model and put on devices.
+    ###################################
+    FLAGS.model.image_channels = example_obs.shape[-1]
+    FLAGS.model.image_size = example_obs.shape[1]
+    vqvae_def = VQVAE(FLAGS.model, train=True)
+    vqvae_params = vqvae_def.init({'params': param_key, 'noise': param_key}, example_obs)['params']
+    tx = optax.adam(learning_rate=FLAGS.model['lr'], b1=FLAGS.model['beta1'], b2=FLAGS.model['beta2'])
+    vqvae_ts = TrainState.create(vqvae_def, vqvae_params, tx=tx)
+    vqvae_def_eps = VQVAE(FLAGS.model, train=False)
+    vqvae_eps_ts = TrainState.create(vqvae_def_eps, vqvae_params)
+    print("Total num of VQVAE parameters:", sum(x.size for x in jax.tree_util.tree_leaves(vqvae_params)))
+    discriminator_def = Discriminator(FLAGS.model)
+    discriminator_params = discriminator_def.init(param_key, example_obs)['params']
+    tx = optax.adam(learning_rate=FLAGS.model['lr'], b1=FLAGS.model['beta1'], b2=FLAGS.model['beta2'])
+    discriminator_ts = TrainState.create(discriminator_def, discriminator_params, tx=tx)
+    print("Total num of Discriminator parameters:", sum(x.size for x in jax.tree_util.tree_leaves(discriminator_params)))
+    model = VQGANModel(rng=rng, vqvae=vqvae_ts, vqvae_eps=vqvae_eps_ts, discriminator=discriminator_ts, config=FLAGS.model)
+    assert FLAGS.load_dir is not None
+    cp = Checkpoint(FLAGS.load_dir)
+    model = cp.load_model(model)
+    print("Loaded model with step", model.vqvae.step)
+    model = flax.jax_utils.replicate(model, devices=jax.local_devices())
+    jax.debug.visualize_array_sharding(model.vqvae.params['decoder']['Conv_0']['bias'])
+    #print(model.vqvae)
+    ####################################
+    # FID Evaluation.
+    ###################################
+    i = 0
+    for valid_images, image_flip, label in dataset:#dataset_valid:
+        valid_images = valid_images.reshape((len(jax.local_devices()), -1, *valid_images.shape[1:])) # [devices, batch//devices, etc..]
+        valid_reconstructed_images = model.reconstruction(valid_images) # [devices, 8, 256, 256, 3]
+        #load up custom image
+#        image = Image.open("osman.png")
+#        image = np.array(image) / 255.0
+#        print(image)
+#        image = jnp.array(image)
+#        image = jnp.expand_dims(image, 0)
+#        image = jnp.expand_dims(image, 0)
+        #Try saving the image off the bat
+#        image_orig =
+#        image = model.reconstruction(image)
+#        image = image[0,0,:,:,:]
+#        image = (image * 255).astype(np.uint8)
+#        image = np.array(image)
+#        img = Image.fromarray(image)
+#        img.save("osman" + str(i) + ".png")
+#        exit()
+        #Whatever...
+        #top left mine
+        #Bottom right SD
+#        fig, axs = plt.subplots(2, 2, figsize=(30, 15))
+  #      axs[0, 0].imshow(valid_images[0, 0], vmin=0, vmax=1)
+ #       axs[1, 0].imshow(valid_reconstructed_images[0, 0], vmin=0, vmax=1)
+#        axs[0, 1].imshow
+ #       plt.savefig("img.jpg")
+        image = valid_images[0,0,:,:,:]
+        image = (image * 255).astype(np.uint8)
+        img = Image.fromarray(image)
+        img.save("original" + str(i) + ".png")
+        image2 = valid_reconstructed_images[0,0,:,:,:]
+        image2 = (image2 * 255).astype(np.uint8)
+        image2 = np.array(image2)
+        image2 = Image.fromarray(image2)
+        image2.save("recon" + str(i) + ".png")
+        i += 1
+        if i == 6:
+            exit()
+#        images.append((valid_reconstructed_images*255).astype(np.uint8))
+if __name__ == '__main__':
+    app.run(main)

f16c16/models/__pycache__/discriminator.cpython-310.pyc ADDED Viewed

Binary file (4.68 kB). View file

f16c16/models/__pycache__/discriminator.cpython-312.pyc ADDED Viewed

Binary file (8.13 kB). View file

f16c16/models/__pycache__/vqvae.cpython-310.pyc ADDED Viewed

Binary file (14.7 kB). View file

f16c16/models/__pycache__/vqvae.cpython-312.pyc ADDED Viewed

Binary file (26.9 kB). View file

f16c16/models/back_model.py ADDED Viewed

	@@ -0,0 +1,343 @@

+from typing import Any
+import flax.linen as nn
+import jax.numpy as jnp
+import functools
+import ml_collections
+import jax
+###########################
+### Helper Modules
+### https://github.com/google-research/maskgit/blob/main/maskgit/nets/layers.py
+###########################
+def get_norm_layer(norm_type):
+    """Normalization layer."""
+    if norm_type == 'BN':
+        raise NotImplementedError
+    elif norm_type == 'LN':
+        norm_fn = functools.partial(nn.LayerNorm)
+    elif norm_type == 'GN':
+        norm_fn = functools.partial(nn.GroupNorm)
+    else:
+        raise NotImplementedError
+    return norm_fn
+def tensorflow_style_avg_pooling(x, window_shape, strides, padding: str):
+    pool_sum = jax.lax.reduce_window(x, 0.0, jax.lax.add,
+                                   (1,) + window_shape + (1,),
+                                   (1,) + strides + (1,), padding)
+    pool_denom = jax.lax.reduce_window(
+        jnp.ones_like(x), 0.0, jax.lax.add, (1,) + window_shape + (1,),
+        (1,) + strides + (1,), padding)
+    return pool_sum / pool_denom
+def upsample(x, factor=2):
+    n, h, w, c = x.shape
+    x = jax.image.resize(x, (n, h * factor, w * factor, c), method='nearest')
+    return x
+def dsample(x):
+    return tensorflow_style_avg_pooling(x, (2, 2), strides=(2, 2), padding='same')
+def squared_euclidean_distance(a: jnp.ndarray,
+                               b: jnp.ndarray,
+                               b2: jnp.ndarray = None) -> jnp.ndarray:
+    """Computes the pairwise squared Euclidean distance.
+    Args:
+        a: float32: (n, d): An array of points.
+        b: float32: (m, d): An array of points.
+        b2: float32: (d, m): b square transpose.
+    Returns:
+        d: float32: (n, m): Where d[i, j] is the squared Euclidean distance between
+        a[i] and b[j].
+    """
+    if b2 is None:
+        b2 = jnp.sum(b.T**2, axis=0, keepdims=True)
+    a2 = jnp.sum(a**2, axis=1, keepdims=True)
+    ab = jnp.matmul(a, b.T)
+    d = a2 - 2 * ab + b2
+    return d
+def entropy_loss_fn(affinity, loss_type="softmax", temperature=1.0):
+    """Calculates the entropy loss. Affinity is the similarity/distance matrix."""
+    flat_affinity = affinity.reshape(-1, affinity.shape[-1])
+    flat_affinity /= temperature
+    probs = jax.nn.softmax(flat_affinity, axis=-1)
+    log_probs = jax.nn.log_softmax(flat_affinity + 1e-5, axis=-1)
+    if loss_type == "softmax":
+        target_probs = probs
+    elif loss_type == "argmax":
+        codes = jnp.argmax(flat_affinity, axis=-1)
+        onehots = jax.nn.one_hot(
+            codes, flat_affinity.shape[-1], dtype=flat_affinity.dtype)
+        onehots = probs - jax.lax.stop_gradient(probs - onehots)
+        target_probs = onehots
+    else:
+        raise ValueError("Entropy loss {} not supported".format(loss_type))
+    avg_probs = jnp.mean(target_probs, axis=0)
+    avg_entropy = -jnp.sum(avg_probs * jnp.log(avg_probs + 1e-5))
+    sample_entropy = -jnp.mean(jnp.sum(target_probs * log_probs, axis=-1))
+    loss = sample_entropy - avg_entropy
+    return loss
+def sg(x):
+    return jax.lax.stop_gradient(x)
+###########################
+### Modules
+###########################
+class ResBlock(nn.Module):
+    """Basic Residual Block."""
+    filters: int
+    norm_fn: Any
+    activation_fn: Any
+    @nn.compact
+    def __call__(self, x):
+        input_dim = x.shape[-1]
+        residual = x
+        x = self.norm_fn()(x)
+        x = self.activation_fn(x)
+        x = nn.Conv(self.filters, kernel_size=(3, 3), use_bias=False)(x)
+        x = self.norm_fn()(x)
+        x = self.activation_fn(x)
+        x = nn.Conv(self.filters, kernel_size=(3, 3), use_bias=False)(x)
+        if input_dim != self.filters:
+            residual = nn.Conv(self.filters, kernel_size=(1, 1), use_bias=False)(x)
+        return x + residual
+class Encoder(nn.Module):
+    """From [H,W,D] image to [H',W',D'] embedding. Using Conv layers."""
+    config: ml_collections.ConfigDict
+    def setup(self):
+        self.filters = self.config.filters
+        self.num_res_blocks = self.config.num_res_blocks
+        self.channel_multipliers = self.config.channel_multipliers
+        self.embedding_dim = self.config.embedding_dim
+        self.norm_type = self.config.norm_type
+        self.activation_fn = nn.swish
+    @nn.compact
+    def __call__(self, x):
+        print("Initializing encoder.")
+        norm_fn = get_norm_layer(norm_type=self.norm_type)
+        block_args = dict(norm_fn=norm_fn, activation_fn=self.activation_fn)
+        print("Incoming encoder shape", x.shape)
+        x = nn.Conv(self.filters, kernel_size=(3, 3), use_bias=False)(x)
+        print('Encoder layer', x.shape)
+        num_blocks = len(self.channel_multipliers)
+        for i in range(num_blocks):
+            filters = self.filters * self.channel_multipliers[i]
+            for _ in range(self.num_res_blocks):
+                x = ResBlock(filters, **block_args)(x)
+            if i < num_blocks - 1:
+                x = dsample(x)
+            print('Encoder layer', x.shape)
+        for _ in range(self.num_res_blocks):
+            x = ResBlock(filters, **block_args)(x)
+            print('Encoder layer', x.shape)
+        x = norm_fn()(x)
+        x = self.activation_fn(x)
+        last_dim = self.embedding_dim*2 if self.config['quantizer_type'] == 'kl' else self.embedding_dim
+        x = nn.Conv(last_dim, kernel_size=(1, 1))(x)
+        print("Before final", x.shape)
+        x = nn.Conv(8, kernel_size=(1,1))(x)
+        print("Final embeddings are size", x.shape)
+        return x
+class Decoder(nn.Module):
+    """From [H',W',D'] embedding to [H,W,D] embedding. Using Conv layers."""
+    config: ml_collections.ConfigDict
+    def setup(self):
+        self.filters = self.config.filters
+        self.num_res_blocks = self.config.num_res_blocks
+        self.channel_multipliers = self.config.channel_multipliers
+        self.norm_type = self.config.norm_type
+        self.image_channels = self.config.image_channels
+        self.activation_fn = nn.swish
+    @nn.compact
+    def __call__(self, x):
+        norm_fn = get_norm_layer(norm_type=self.norm_type)
+        block_args = dict(norm_fn=norm_fn, activation_fn=self.activation_fn,)
+        num_blocks = len(self.channel_multipliers)
+        filters = self.filters * self.channel_multipliers[-1]
+        print("Decoder incoming shape", x.shape)
+        #We don't need to do anything here because it'll put it back to 512
+        x = nn.Conv(filters, kernel_size=(3, 3), use_bias=True)(x)
+        print("Decoder input", x.shape)
+        for _ in range(self.num_res_blocks):
+            x = ResBlock(filters, **block_args)(x)
+            print('Decoder layer', x.shape)
+        for i in reversed(range(num_blocks)):
+            filters = self.filters * self.channel_multipliers[i]
+            for _ in range(self.num_res_blocks):
+                x = ResBlock(filters, **block_args)(x)
+            if i > 0:
+                x = upsample(x, 2)
+                x = nn.Conv(filters, kernel_size=(3, 3))(x)
+            print('Decoder layer', x.shape)
+        x = norm_fn()(x)
+        x = self.activation_fn(x)
+        x = nn.Conv(self.image_channels, kernel_size=(3, 3))(x)
+        return x
+class VectorQuantizer(nn.Module):
+    """Basic vector quantizer."""
+    config: ml_collections.ConfigDict
+    train: bool
+    @nn.compact
+    def __call__(self, x):
+        codebook_size = self.config.codebook_size
+        emb_dim = x.shape[-1]
+        codebook = self.param(
+            "codebook",
+            jax.nn.initializers.variance_scaling(scale=1.0, mode="fan_in", distribution="uniform"),
+            (codebook_size, emb_dim))
+        codebook = jnp.asarray(codebook) # (codebook_size, emb_dim)
+        distances = jnp.reshape(
+            squared_euclidean_distance(jnp.reshape(x, (-1, emb_dim)), codebook),
+            x.shape[:-1] + (codebook_size,)) # [x, codebook_size] similarity matrix.
+        encoding_indices = jnp.argmin(distances, axis=-1)
+        encoding_onehot = jax.nn.one_hot(encoding_indices, codebook_size)
+        quantized = self.quantize(encoding_onehot)
+        result_dict = dict()
+        if self.train:
+            e_latent_loss = jnp.mean((sg(quantized) - x)**2) * self.config.commitment_cost
+            q_latent_loss = jnp.mean((quantized - sg(x))**2)
+            entropy_loss = 0.0
+            if self.config.entropy_loss_ratio != 0:
+                entropy_loss = entropy_loss_fn(
+                    -distances,
+                    loss_type=self.config.entropy_loss_type,
+                    temperature=self.config.entropy_temperature
+                ) * self.config.entropy_loss_ratio
+            e_latent_loss = jnp.asarray(e_latent_loss, jnp.float32)
+            q_latent_loss = jnp.asarray(q_latent_loss, jnp.float32)
+            entropy_loss = jnp.asarray(entropy_loss, jnp.float32)
+            loss = e_latent_loss + q_latent_loss + entropy_loss
+            result_dict = dict(
+                quantizer_loss=loss,
+                e_latent_loss=e_latent_loss,
+                q_latent_loss=q_latent_loss,
+                entropy_loss=entropy_loss)
+            quantized = x + jax.lax.stop_gradient(quantized - x)
+        result_dict.update({
+            "z_ids": encoding_indices,
+        })
+        return quantized, result_dict
+    def quantize(self, encoding_onehot: jnp.ndarray) -> jnp.ndarray:
+        codebook = jnp.asarray(self.variables["params"]["codebook"])
+        return jnp.dot(encoding_onehot, codebook)
+    def decode_ids(self, ids: jnp.ndarray) -> jnp.ndarray:
+        codebook = self.variables["params"]["codebook"]
+        return jnp.take(codebook, ids, axis=0)
+class KLQuantizer(nn.Module):
+    config: ml_collections.ConfigDict
+    train: bool
+    @nn.compact
+    def __call__(self, x):
+        emb_dim = x.shape[-1] // 2 # Use half as means, half as logvars.
+        means = x[..., :emb_dim]
+        logvars = x[..., emb_dim:]
+        if not self.train:
+            result_dict = dict()
+            return means, result_dict
+        else:
+            noise = jax.random.normal(self.make_rng("noise"), means.shape)
+            stds = jnp.exp(0.5 * logvars)
+            z = means + stds * noise
+            kl_loss = -0.5 * jnp.mean(1 + logvars - means**2 - jnp.exp(logvars))
+            result_dict = dict(quantizer_loss=kl_loss)
+            return z, result_dict
+class FSQuantizer(nn.Module):
+    config: ml_collections.ConfigDict
+    train: bool
+    @nn.compact
+    def __call__(self, x):
+        assert self.config['fsq_levels'] % 2 == 1, "FSQ levels must be odd."
+        z = jnp.tanh(x) # [-1, 1]
+        z = z * (self.config['fsq_levels']-1) / 2 # [-fsq_levels/2, fsq_levels/2]
+        zhat = jnp.round(z) # e.g. [-2, -1, 0, 1, 2]
+        quantized = z + jax.lax.stop_gradient(zhat - z)
+        quantized = quantized / (self.config['fsq_levels'] // 2) # [-1, 1], but quantized.
+        result_dict = dict()
+        # Diagnostics for codebook usage.
+        zhat_scaled = zhat + self.config['fsq_levels'] // 2
+        basis = jnp.concatenate((jnp.array([1]), jnp.cumprod(jnp.array([self.config['fsq_levels']] * (x.shape[-1]-1))))).astype(jnp.uint32)
+        idx = (zhat_scaled * basis).sum(axis=-1).astype(jnp.uint32)
+        idx_flat = idx.reshape(-1)
+        usage = jnp.bincount(idx_flat, length=self.config['fsq_levels']**x.shape[-1])
+        result_dict.update({
+            "z_ids": zhat,
+            'usage': usage
+        })
+        return quantized, result_dict
+class VQVAE(nn.Module):
+    """VQVAE model."""
+    config: ml_collections.ConfigDict
+    train: bool
+    def setup(self):
+        """VQVAE setup."""
+        if self.config['quantizer_type'] == 'vq':
+            self.quantizer = VectorQuantizer(config=self.config, train=self.train)
+        elif self.config['quantizer_type'] == 'kl':
+            self.quantizer = KLQuantizer(config=self.config, train=self.train)
+        elif self.config['quantizer_type'] == 'fsq':
+            self.quantizer = FSQuantizer(config=self.config, train=self.train)
+        self.encoder = Encoder(config=self.config)
+        self.decoder = Decoder(config=self.config)
+    def encode(self, image):
+        encoded_feature = self.encoder(image)
+        quantized, result_dict = self.quantizer(encoded_feature)
+        print("After quant", quantized.shape)
+        return quantized, result_dict
+    def decode(self, z_vectors):
+        print("z_vectors shape", z_vectors.shape)
+        reconstructed = self.decoder(z_vectors)
+        return reconstructed
+    def decode_from_indices(self, z_ids):
+        z_vectors = self.quantizer.decode_ids(z_ids)
+        reconstructed_image = self.decode(z_vectors)
+        return reconstructed_image
+    def encode_to_indices(self, image):
+        encoded_feature = self.encoder(image)
+        _, result_dict = self.quantizer(encoded_feature)
+        ids = result_dict["z_ids"]
+        return ids
+    def __call__(self, input_dict):
+        quantized, result_dict = self.encode(input_dict)
+        outputs = self.decoder(quantized)
+        return outputs, result_dict

f16c16/models/discriminator.py ADDED Viewed

	@@ -0,0 +1,123 @@

+"""Discriminator from StyleGAN. https://github.com/google-research/maskgit/blob/main/maskgit/nets/discriminator.py"""
+import functools
+import math
+from typing import Any, Tuple
+import flax.linen as nn
+from flax.linen.initializers import xavier_uniform
+import jax
+from jax import lax
+import jax.numpy as jnp
+import ml_collections
+default_kernel_init = xavier_uniform()
+def _conv_dimension_numbers(input_shape):
+    """Computes the dimension numbers based on the input shape."""
+    ndim = len(input_shape)
+    lhs_spec = (0, ndim - 1) + tuple(range(1, ndim - 1))
+    rhs_spec = (ndim - 1, ndim - 2) + tuple(range(0, ndim - 2))
+    out_spec = lhs_spec
+    return lax.ConvDimensionNumbers(lhs_spec, rhs_spec, out_spec)
+class BlurPool2D(nn.Module):
+    """A layer to do channel-wise blurring + subsampling on 2D inputs.
+    Reference:
+    Zhang et al. Making Convolutional Networks Shift-Invariant Again.
+    https://arxiv.org/pdf/1904.11486.pdf.
+    """
+    filter_size: int = 4
+    strides: Tuple[int, int] = (2, 2)
+    padding: str = 'SAME'
+    def setup(self):
+        if self.filter_size == 3:
+            self.filter = [1., 2., 1.]
+        elif self.filter_size == 4:
+            self.filter = [1., 3., 3., 1.]
+        elif self.filter_size == 5:
+            self.filter = [1., 4., 6., 4., 1.]
+        elif self.filter_size == 6:
+            self.filter = [1., 5., 10., 10., 5., 1.]
+        elif self.filter_size == 7:
+            self.filter = [1., 6., 15., 20., 15., 6., 1.]
+        else:
+            raise ValueError('Only filter_size of 3, 4, 5, 6 or 7 is supported.')
+        self.filter = jnp.array(self.filter, dtype=jnp.float32)
+        self.filter = self.filter[:, None] * self.filter[None, :]
+        with jax.default_matmul_precision('float32'):
+            self.filter /= jnp.sum(self.filter)
+        self.filter = jnp.reshape(
+            self.filter, [self.filter.shape[0], self.filter.shape[1], 1, 1])
+    @nn.compact
+    def __call__(self, inputs):
+        channel_num = inputs.shape[-1]
+        dimension_numbers = _conv_dimension_numbers(inputs.shape)
+        depthwise_filter = jnp.tile(self.filter, [1, 1, 1, channel_num])
+        with jax.default_matmul_precision('float32'):
+            outputs = lax.conv_general_dilated(inputs, depthwise_filter, self.strides,
+                self.padding, feature_group_count=channel_num, dimension_numbers=dimension_numbers)
+        return outputs
+class ResBlock(nn.Module):
+    """StyleGAN ResBlock for D.
+    https://github.com/rosinality/stylegan2-pytorch/blob/master/model.py#L618
+    """
+    filters: int
+    activation_fn: Any
+    @nn.compact
+    def __call__(self, x):
+        input_dim = x.shape[-1]
+        residual = x
+        x = nn.Conv(input_dim, (3, 3), kernel_init=default_kernel_init)(x)
+        x = self.activation_fn(x)
+        x = BlurPool2D(filter_size=4)(x)
+        residual = BlurPool2D(filter_size=4)(residual)
+        residual = nn.Conv(self.filters, (1, 1), use_bias=False, kernel_init=default_kernel_init)(residual)
+        x = nn.Conv(self.filters, (3, 3), kernel_init=default_kernel_init)(x)
+        x = self.activation_fn(x)
+        out = (residual + x) / math.sqrt(2)
+        return out
+class Discriminator(nn.Module):
+    """StyleGAN Discriminator."""
+    config: ml_collections.ConfigDict
+    def setup(self):
+        self.input_size = self.config.image_size
+        self.activation_fn = functools.partial(jax.nn.leaky_relu, negative_slope=0.2)
+        self.channel_multiplier = 1
+    @nn.compact
+    def __call__(self, x):
+        filters = {
+            4: 512,
+            8: 512,
+            16: 512,
+            32: 512,
+            64: 256 * self.channel_multiplier,
+            128: 128 * self.channel_multiplier,
+            256: 64 * self.channel_multiplier,
+            512: 32 * self.channel_multiplier,
+            1024: 16 * self.channel_multiplier,
+        }
+        x = nn.Conv(filters[self.input_size], (3, 3), kernel_init=default_kernel_init)(x)
+        x = self.activation_fn(x)
+        log_size = int(math.log2(self.input_size))
+        for i in range(log_size, 2, -1):
+            x = ResBlock(filters[2**(i - 1)], self.activation_fn)(x)
+            print("Disc shape", x.shape)
+        x = nn.Conv(filters[4], (3, 3), kernel_init=default_kernel_init)(x)
+        x = self.activation_fn(x)
+        x = x.reshape((x.shape[0], -1))
+        x = nn.Dense(filters[4], kernel_init=default_kernel_init)(x)
+        x = self.activation_fn(x)
+        x = nn.Dense(1, kernel_init=default_kernel_init)(x)
+        return x

f16c16/models/vqvae.py ADDED Viewed

	@@ -0,0 +1,527 @@

+from typing import Any
+import flax.linen as nn
+import jax.numpy as jnp
+import functools
+import ml_collections
+import jax
+from flax.linen import initializers
+###########################
+### Helper Modules
+### https://github.com/google-research/maskgit/blob/main/maskgit/nets/layers.py
+###########################
+def get_norm_layer(norm_type):
+    """Normalization layer."""
+    if norm_type == 'BN':
+        raise NotImplementedError
+    elif norm_type == 'LN':
+        norm_fn = functools.partial(nn.LayerNorm)
+    elif norm_type == 'GN':
+        norm_fn = functools.partial(nn.GroupNorm)
+    else:
+        raise NotImplementedError
+    return norm_fn
+def tensorflow_style_avg_pooling(x, window_shape, strides, padding: str):
+    pool_sum = jax.lax.reduce_window(x, 0.0, jax.lax.add,
+                                   (1,) + window_shape + (1,),
+                                   (1,) + strides + (1,), padding)
+    pool_denom = jax.lax.reduce_window(
+        jnp.ones_like(x), 0.0, jax.lax.add, (1,) + window_shape + (1,),
+        (1,) + strides + (1,), padding)
+    return pool_sum / pool_denom
+def upsample(x, factor=2):
+    n, h, w, c = x.shape
+    x = jax.image.resize(x, (n, h * factor, w * factor, c), method='nearest')
+    return x
+def dsample(x):
+    return tensorflow_style_avg_pooling(x, (2, 2), strides=(2, 2), padding='same')
+def squared_euclidean_distance(a: jnp.ndarray,
+                               b: jnp.ndarray,
+                               b2: jnp.ndarray = None) -> jnp.ndarray:
+    """Computes the pairwise squared Euclidean distance.
+    Args:
+        a: float32: (n, d): An array of points.
+        b: float32: (m, d): An array of points.
+        b2: float32: (d, m): b square transpose.
+    Returns:
+        d: float32: (n, m): Where d[i, j] is the squared Euclidean distance between
+        a[i] and b[j].
+    """
+    if b2 is None:
+        b2 = jnp.sum(b.T**2, axis=0, keepdims=True)
+    a2 = jnp.sum(a**2, axis=1, keepdims=True)
+    ab = jnp.matmul(a, b.T)
+    d = a2 - 2 * ab + b2
+    return d
+def entropy_loss_fn(affinity, loss_type="softmax", temperature=1.0):
+    """Calculates the entropy loss. Affinity is the similarity/distance matrix."""
+    flat_affinity = affinity.reshape(-1, affinity.shape[-1])
+    flat_affinity /= temperature
+    probs = jax.nn.softmax(flat_affinity, axis=-1)
+    log_probs = jax.nn.log_softmax(flat_affinity + 1e-5, axis=-1)
+    if loss_type == "softmax":
+        target_probs = probs
+    elif loss_type == "argmax":
+        codes = jnp.argmax(flat_affinity, axis=-1)
+        onehots = jax.nn.one_hot(
+            codes, flat_affinity.shape[-1], dtype=flat_affinity.dtype)
+        onehots = probs - jax.lax.stop_gradient(probs - onehots)
+        target_probs = onehots
+    else:
+        raise ValueError("Entropy loss {} not supported".format(loss_type))
+    avg_probs = jnp.mean(target_probs, axis=0)
+    avg_entropy = -jnp.sum(avg_probs * jnp.log(avg_probs + 1e-5))
+    sample_entropy = -jnp.mean(jnp.sum(target_probs * log_probs, axis=-1))
+    loss = sample_entropy - avg_entropy
+    return loss
+def sg(x):
+    return jax.lax.stop_gradient(x)
+###########################
+### Modules
+###########################
+class ResBlock(nn.Module):
+    """Basic Residual Block."""
+    filters: int
+    norm_fn: Any
+    activation_fn: Any
+    @nn.compact
+    def __call__(self, x):
+        input_dim = x.shape[-1]
+        residual = x
+        x = self.norm_fn()(x)
+        x = self.activation_fn(x)
+        x = nn.Conv(self.filters, kernel_size=(3, 3), use_bias=False)(x)
+        x = self.norm_fn()(x)
+        x = self.activation_fn(x)
+        x = nn.Conv(self.filters, kernel_size=(3, 3), use_bias=False)(x)
+        if input_dim != self.filters:#Basically if input doesn't match output, use a skip
+            residual = nn.Conv(self.filters, kernel_size=(1, 1), use_bias=False)(x)
+        return x + residual
+class Fourier(nn.Module):
+    def setup(self):
+        #Our input comes in as 3... after we convert to 512, maybe instead we convert to 256, and then do this?
+        self.weight = jax.random.normal(self.make_rng("noise"), means.shape)
+    @nn.compact
+    def __call__(self, f):
+        #this is probabl ycahnnels lastz
+        f = 2 * math.pi * input @ self.weight.T
+        return torch.cat([f.cos(), f.sin()], dim = -1)
+from einops import rearrange
+class LinearEncoder(nn.Module):
+    config: ml_collections.ConfigDict
+    #So in this setup, we don't carea bout anything
+    @nn.compact
+    def __call__(self, x):
+        print("init encoder")
+        print("x shape", x.shape)
+        x = rearrange(x, '... (h b1) (w b2) c -> ... h w (c b1 b2)', b1=8, b2=8)
+        x = nn.Dense(4)(x)#We just put to 4 for now
+        print(x.shape)
+        return x
+        #k = nn.Dense(self.hidden_size, **self.tc.default_config())(x_modulated)
+#1x1 conv, uplift from 3 to like..... 64
+#That gives us 256x256x64
+#Then pixelshuffle to
+class Encoder(nn.Module):
+    """From [H,W,D] image to [H',W',D'] embedding. Using Conv layers."""
+    config: ml_collections.ConfigDict
+    def setup(self):
+        self.filters = self.config.filters#filters is the original setup
+        self.num_res_blocks = self.config.num_res_blocks
+        self.channel_multipliers = self.config.channel_multipliers
+        self.embedding_dim = self.config.embedding_dim
+        self.norm_type = self.config.norm_type
+        self.activation_fn = nn.swish
+        self.kernel_init = initializers.he_normal()
+    @nn.compact
+    def __call__(self, x):
+        print("Initializing encoder.")
+        norm_fn = get_norm_layer(norm_type=self.norm_type)
+        block_args = dict(norm_fn=norm_fn, activation_fn=self.activation_fn)
+        print("Incoming encoder shape", x.shape)
+        x = nn.Conv(self.filters, kernel_size=(3, 3), use_bias=False)(x)
+        print('Encoder layer', x.shape)
+        num_blocks = len(self.channel_multipliers)
+        #The way SD works, is it does 2x resnet, not changing anything, then downsample
+        #It does this 3 times, leading to 8x downsample
+        #Then it has an extra resnet block, and THEN from 512 to 8 / 4
+        for i in range(num_blocks):
+            filters = self.filters * self.channel_multipliers[i]
+            for _ in range(self.num_res_blocks):
+                x = ResBlock(filters, **block_args)(x)
+            if i < num_blocks - 1:#For each block *except end* do downsample
+                print("doing downsample")
+                x = dsample(x)
+            print('Encoder layer', x.shape)
+        #After we are done downsampling, we do the 2 resnet, and down below here, we have the 2 midblock?
+        for _ in range(self.num_res_blocks):
+            x = ResBlock(filters, **block_args)(x)
+            print('Encoder layer final', x.shape)
+        x = norm_fn()(x)
+        x = self.activation_fn(x)
+        last_dim = self.embedding_dim*2 if self.config['quantizer_type'] == 'kl' else self.embedding_dim
+        x = nn.Conv(last_dim, kernel_size=(1, 1))(x)
+        print("Final embeddings are size", x.shape)
+        return x
+class Decoder(nn.Module):
+    """From [H',W',D'] embedding to [H,W,D] embedding. Using Conv layers."""
+    config: ml_collections.ConfigDict
+    def setup(self):
+        self.filters = self.config.filters
+        self.num_res_blocks = self.config.num_res_blocks
+        self.channel_multipliers = self.config.channel_multipliers
+        self.norm_type = self.config.norm_type
+        self.image_channels = self.config.image_channels
+        self.activation_fn = nn.swish
+        self.kernel_init = initializers.he_normal()
+    @nn.compact
+    def __call__(self, x):
+        norm_fn = get_norm_layer(norm_type=self.norm_type)
+        block_args = dict(norm_fn=norm_fn, activation_fn=self.activation_fn,)
+        num_blocks = len(self.channel_multipliers)
+        filters = self.filters * self.channel_multipliers[-1]
+        print("Decoder incoming shape", x.shape)
+        #We don't need to do anything here because it'll put it back to 512
+        x = nn.Conv(filters, kernel_size=(3, 3), use_bias=True)(x)
+        print("Decoder input", x.shape)
+        #This is the mid block
+        for _ in range(self.num_res_blocks):
+            x = ResBlock(filters, **block_args)(x)
+            print('Mid Block Decoder layer', x.shape)
+        #First two SET of blocks is just 3 resnet, no channel changes, we are already at 4x = 512
+        for i in reversed(range(num_blocks)):
+            filters = self.filters * self.channel_multipliers[i]
+            for _ in range(self.num_res_blocks):#sym
+                x = ResBlock(filters, **block_args)(x)
+            if i > 0:
+                x = upsample(x, 2)
+                x = nn.Conv(filters, kernel_size=(3, 3))(x)
+            print('Decoder layer', x.shape)
+        x = norm_fn()(x)
+        x = self.activation_fn(x)
+        x = nn.Conv(self.image_channels, kernel_size=(3, 3))(x)
+        return x
+class VectorQuantizer(nn.Module):
+    """Basic vector quantizer."""
+    config: ml_collections.ConfigDict
+    train: bool
+    @nn.compact
+    def __call__(self, x):
+        codebook_size = self.config.codebook_size
+        emb_dim = x.shape[-1]
+        codebook = self.param(
+            "codebook",
+            jax.nn.initializers.variance_scaling(scale=1.0, mode="fan_in", distribution="uniform"),
+            (codebook_size, emb_dim))
+        codebook = jnp.asarray(codebook) # (codebook_size, emb_dim)
+        distances = jnp.reshape(
+            squared_euclidean_distance(jnp.reshape(x, (-1, emb_dim)), codebook),
+            x.shape[:-1] + (codebook_size,)) # [x, codebook_size] similarity matrix.
+        encoding_indices = jnp.argmin(distances, axis=-1)
+        encoding_onehot = jax.nn.one_hot(encoding_indices, codebook_size)
+        quantized = self.quantize(encoding_onehot)
+        result_dict = dict()
+        if self.train:
+            e_latent_loss = jnp.mean((sg(quantized) - x)**2) * self.config.commitment_cost
+            q_latent_loss = jnp.mean((quantized - sg(x))**2)
+            entropy_loss = 0.0
+            if self.config.entropy_loss_ratio != 0:
+                entropy_loss = entropy_loss_fn(
+                    -distances,
+                    loss_type=self.config.entropy_loss_type,
+                    temperature=self.config.entropy_temperature
+                ) * self.config.entropy_loss_ratio
+            e_latent_loss = jnp.asarray(e_latent_loss, jnp.float32)
+            q_latent_loss = jnp.asarray(q_latent_loss, jnp.float32)
+            entropy_loss = jnp.asarray(entropy_loss, jnp.float32)
+            loss = e_latent_loss + q_latent_loss + entropy_loss
+            result_dict = dict(
+                quantizer_loss=loss,
+                e_latent_loss=e_latent_loss,
+                q_latent_loss=q_latent_loss,
+                entropy_loss=entropy_loss)
+            quantized = x + jax.lax.stop_gradient(quantized - x)
+        result_dict.update({
+            "z_ids": encoding_indices,
+        })
+        return quantized, result_dict
+    def quantize(self, encoding_onehot: jnp.ndarray) -> jnp.ndarray:
+        codebook = jnp.asarray(self.variables["params"]["codebook"])
+        return jnp.dot(encoding_onehot, codebook)
+    def decode_ids(self, ids: jnp.ndarray) -> jnp.ndarray:
+        codebook = self.variables["params"]["codebook"]
+        return jnp.take(codebook, ids, axis=0)
+class KLQuantizer(nn.Module):
+    config: ml_collections.ConfigDict
+    train: bool
+    @nn.compact
+    def __call__(self, x):
+        emb_dim = x.shape[-1] // 2 # Use half as means, half as logvars.
+        means = x[..., :emb_dim]
+        logvars = x[..., emb_dim:]
+        if not self.train:
+            result_dict = dict()
+            result_dict["std"] = jnp.exp(0.5 * logvars)
+            return means, result_dict
+        else:
+            noise = jax.random.normal(self.make_rng("noise"), means.shape)
+            stds = jnp.exp(0.5 * logvars)
+            z = means + stds * noise
+            #kl_loss = -0.5 * jnp.mean(1 + logvars - means**2 - jnp.exp(logvars))
+            #New kl
+            kl_loss = - 0.5 * jnp.sum(1 + logvars - jnp.square(means) - jnp.exp(logvars),axis=tuple(range(1, means.ndim)))
+            kl_loss = jnp.mean(kl_loss)
+            result_dict = dict(quantizer_loss=kl_loss)
+            result_dict["std"] = jnp.exp(0.5 * logvars)
+            return z, result_dict
+class AEQuantizer(nn.Module): #cooking
+    config: ml_collections.ConfigDict
+    train: bool
+    @nn.compact
+    def __call__(self, x):
+        result_dict = dict()
+        result_dict["std"] = 0.0
+        return x, result_dict
+import jax
+import jax.numpy as jnp
+from jax import random
+def imq_kernel(X: jnp.ndarray, Y: jnp.ndarray, h_dim: int):
+    batch_size = X.shape[0]
+    norms_x = jnp.sum(X**2, axis=1, keepdims=True)  # batch_size x 1
+    prods_x = jnp.dot(X, X.T)  # batch_size x batch_size
+    dists_x = norms_x + norms_x.T - 2 * prods_x
+    norms_y = jnp.sum(Y**2, axis=1, keepdims=True)  # batch_size x 1
+    prods_y = jnp.dot(Y, Y.T)  # batch_size x batch_size
+    dists_y = norms_y + norms_y.T - 2 * prods_y
+    dot_prd = jnp.dot(X, Y.T)
+    dists_c = norms_x + norms_y.T - 2 * dot_prd
+    stats = 0
+    for scale in [0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0]:
+        C = 2 * h_dim * 1.0 * scale
+        res1 = C / (C + dists_x)
+        res1 += C / (C + dists_y)
+        res1 = (1 - jnp.eye(batch_size)) * res1
+        res1 = jnp.sum(res1) / (batch_size - 1)
+        res2 = C / (C + dists_c)
+        res2 = jnp.sum(res2) * 2.0 / batch_size
+        stats += res1 - res2
+    return stats
+class MMDQuantizer(nn.Module): #cooking
+    config: ml_collections.ConfigDict
+    train: bool
+    @nn.compact
+    def __call__(self, x):
+        if not self.train:
+            result_dict = dict()
+            return x, result_dict
+        else:
+            print("mmd quantizer")
+            batch_size, height, width, latent_channels = x.shape
+            z_flat = x.reshape(batch_size, -1)
+            print(z_flat.shape)
+            z_fake_flat = jax.random.normal(self.make_rng("noise"), z_flat.shape) * self.config["MMD_weight"]
+            print(z_fake_flat.shape)
+            mmd_loss = imq_kernel(z_flat, z_fake_flat, z_flat.shape[1])
+            print(mmd_loss.shape)
+            print(mmd_loss)
+            result_dict = dict(quantizer_loss=mmd_loss)
+            return x, result_dict
+class KLQuantizerTwo(nn.Module):
+    config: ml_collections.ConfigDict
+    train: bool
+    @nn.compact
+    def __call__(self, x):
+        #emb_dim = x.shape[-1] // 2 # Use half as means, half as logvars.
+        #means = x[..., :emb_dim]
+        #logvars = x[..., emb_dim:]
+        #Wwe actually wanna do mean and STD on the batch axis?
+        #we start as b hw 8, go to b hw 4, with mean and std over those.
+        if not self.train:
+            result_dict = dict()
+            result_dict["std"] = 1.0
+            return x, result_dict
+        else:
+            stds = jnp.std(x, axis = [1,2,3])
+            noise = jax.random.normal(self.make_rng("noise"), x.shape)
+            logvars = .5 * jnp.log(stds)
+            logvars = logvars.reshape(-1,1,1,1)
+            if True:#This is true for special KL where we set sigma to 1 manually
+                logvars = 0.0
+            if False:#dinossl
+                x_2 = x.reshape(x.shape[0], -1, x.shape[-1])#Linear with channel size
+                x_2 = jnp.swapaxes(x_2,0,1)
+                #then/ get the covariance
+                cov = jnp.swapaxes(x_2,1,2) @ x_2 / x.shape[0]
+                #Not sure about this, we also have regular cov
+                I_d = jnp.identity(x.shape[-1])
+                R_eps = jnp.log(jnp.linalg.det(jnp.expand_dims(I_d, axis = 0) + x.shape[-1]/ (.0001 ** 2) * cov))
+                #So something here *does* depend on the -1 shape, but I need to math it out.
+                kl_loss = R_eps.mean()
+            #This is the denoising version
+            kl_loss = - 0.5 * jnp.sum(1 + logvars - jnp.square(x) - jnp.exp(logvars),axis=tuple(range(1, x.ndim)))
+            kl_loss = jnp.mean(kl_loss)
+            result_dict = dict(quantizer_loss=kl_loss)
+            result_dict["std"] = 1.0
+            #For proper kl two, we need to return noise + mean.
+            return x + noise, result_dict
+class FSQuantizer(nn.Module):
+    config: ml_collections.ConfigDict
+    train: bool
+    @nn.compact
+    def __call__(self, x):
+        assert self.config['fsq_levels'] % 2 == 1, "FSQ levels must be odd."
+        z = jnp.tanh(x) # [-1, 1]
+        z = z * (self.config['fsq_levels']-1) / 2 # [-fsq_levels/2, fsq_levels/2]
+        zhat = jnp.round(z) # e.g. [-2, -1, 0, 1, 2]
+        quantized = z + jax.lax.stop_gradient(zhat - z)
+        quantized = quantized / (self.config['fsq_levels'] // 2) # [-1, 1], but quantized.
+        result_dict = dict()
+        # Diagnostics for codebook usage.
+        zhat_scaled = zhat + self.config['fsq_levels'] // 2
+        basis = jnp.concatenate((jnp.array([1]), jnp.cumprod(jnp.array([self.config['fsq_levels']] * (x.shape[-1]-1))))).astype(jnp.uint32)
+        idx = (zhat_scaled * basis).sum(axis=-1).astype(jnp.uint32)
+        idx_flat = idx.reshape(-1)
+        usage = jnp.bincount(idx_flat, length=self.config['fsq_levels']**x.shape[-1])
+        result_dict.update({
+            "z_ids": zhat,
+            'usage': usage
+        })
+        return quantized, result_dict
+class VQVAE(nn.Module):
+    """VQVAE model."""
+    config: ml_collections.ConfigDict
+    train: bool
+    def setup(self):
+        """VQVAE setup."""
+        if self.config['quantizer_type'] == 'vq':
+            self.quantizer = VectorQuantizer(config=self.config, train=self.train)
+        elif self.config['quantizer_type'] == 'kl':
+            self.quantizer = KLQuantizer(config=self.config, train=self.train)
+        elif self.config['quantizer_type'] == 'fsq':
+            self.quantizer = FSQuantizer(config=self.config, train=self.train)
+        elif self.config['quantizer_type'] == 'ae':
+            self.quantizer = AEQuantizer(config=self.config, train=self.train)
+        elif self.config["quantizer_type"] == "kl_two":
+            self.quantizer = KLQuantizerTwo(config=self.config, train=self.train)
+        self.encoder = Encoder(config=self.config)
+        self.decoder = Decoder(config=self.config)
+    def encode(self, image):
+        encoded_feature = self.encoder(image)
+        quantized, result_dict = self.quantizer(encoded_feature)
+        print("After quant", quantized.shape)
+        return quantized, result_dict
+    def decode(self, z_vectors):
+        print("z_vectors shape", z_vectors.shape)
+        reconstructed = self.decoder(z_vectors)
+        return reconstructed
+    def decode_from_indices(self, z_ids):
+        z_vectors = self.quantizer.decode_ids(z_ids)
+        reconstructed_image = self.decode(z_vectors)
+        return reconstructed_image
+    def encode_to_indices(self, image):
+        encoded_feature = self.encoder(image)
+        _, result_dict = self.quantizer(encoded_feature)
+        ids = result_dict["z_ids"]
+        return ids
+    def __call__(self, input_dict):
+        quantized, result_dict = self.encode(input_dict)
+        #Freezing encoder now
+        print("encode finished")
+        result_dict["latents"] = quantized
+        outputs = self.decoder(quantized)
+        return outputs, result_dict

f16c16/ppl_images.py ADDED Viewed

	@@ -0,0 +1,255 @@

+try: # For debugging
+    from localutils.debugger import enable_debug
+    enable_debug()
+except ImportError:
+    pass
+#import jax
+#jax.config.update('jax_platform_name', 'cpu')
+import os
+# os.environ["JAX_PLATFORMS"] = 'cpu'
+import jax
+import lpips
+loss_fn_alex = lpips.LPIPS(net='alex') # best forward scores
+loss_fn_alex = loss_fn_alex.cuda()
+import numpy as np
+import flax.linen as nn
+import jax.numpy as jnp
+from absl import app, flags
+from functools import partial
+import numpy as np
+import tqdm
+import flax
+import optax
+import wandb
+from ml_collections import config_flags
+#import elements
+import ml_collections
+import tensorflow_datasets as tfds
+import tensorflow as tf
+tf.config.set_visible_devices([], "GPU")
+tf.config.set_visible_devices([], "TPU")
+import matplotlib.pyplot as plt
+from typing import Any
+from utils.train_state import TrainState, target_update
+from utils.checkpoint import Checkpoint
+from utils.fid import get_fid_network, fid_from_stats
+from train import VQGANModel
+from models.vqvae import VQVAE
+from models.discriminator import Discriminator
+from PIL import Image
+import torch
+delattr(flags.FLAGS, 'dataset_name')
+delattr(flags.FLAGS, 'load_dir')
+delattr(flags.FLAGS, 'batch_size')
+FLAGS = flags.FLAGS
+flags.DEFINE_string('dataset_name', 'imagenet256', 'Environment name.')
+flags.DEFINE_string('load_dir', "/home/dkaplan/Downloads/Models/checkpoint(1).tmp", 'Load dir (if not None, load params from here).')
+flags.DEFINE_integer('batch_size', 2, 'Total Batch size.')
+# Flags are inhereited from train.py, so pass your model parameters again here to evaluate.
+import gc
+def main(_):
+    device_count = len(jax.local_devices())
+    global_device_count = jax.device_count()
+    local_batch_size = FLAGS.batch_size // (global_device_count // device_count)
+    def get_dataset(is_train):
+        if 'imagenet' in FLAGS.dataset_name:
+            def deserialization_fn(data):
+                image = data['image']
+                min_side = tf.minimum(tf.shape(image)[0], tf.shape(image)[1])
+                image = tf.image.resize_with_crop_or_pad(image, min_side, min_side)
+                if 'imagenet256' in FLAGS.dataset_name:
+                    image = tf.image.resize(image, (256, 256))
+                elif 'imagenet128' in FLAGS.dataset_name:
+                    image = tf.image.resize(image, (128, 128))
+                else:
+                    raise ValueError(f"Unknown dataset {FLAGS.dataset_name}")
+                if is_train:
+                    image = tf.image.random_flip_left_right(image)
+                image = tf.cast(image, tf.float32) / 255.0
+                return image
+            split = tfds.split_for_jax_process('train' if is_train else 'validation', drop_remainder=True)
+            dataset = tfds.load('imagenet2012', data_dir="/data/inet", split=split)
+            dataset = dataset.map(deserialization_fn, num_parallel_calls=tf.data.AUTOTUNE)
+            dataset = dataset.shuffle(10000, seed=42, reshuffle_each_iteration=True)
+            dataset = dataset.batch(local_batch_size)
+            dataset = dataset.prefetch(tf.data.AUTOTUNE)
+            dataset = tfds.as_numpy(dataset)
+            dataset = iter(dataset)
+            return dataset
+        else:
+            raise ValueError(f"Unknown dataset {FLAGS.dataset_name}")
+    dataset = get_dataset(is_train=True)
+    dataset_valid = get_dataset(is_train=False)
+    # image = Image.open("osman.png")
+    # image = np.array(image) / 255.0
+    # print(image)
+    # image = jnp.array(image)
+    # image = jnp.expand_dims(image, 0)
+    # image = jnp.expand_dims(image, 0)
+    example_obs = next(dataset)[:1]
+    #Reconstruction loop
+    # image = model.reconstruction(image)
+    # image = image[0,0,:,:,:]
+    # image = (image * 255).astype(np.uint8)
+    # image = np.array(image)
+    # img = Image.fromarray(image)
+    # img.save("osman" + str(i) + ".png")
+    rng = jax.random.PRNGKey(FLAGS.seed)
+    rng, param_key = jax.random.split(rng)
+    print("Total devices", jax.local_devices()[0])
+    ###################################
+    # Creating Model and put on devices.
+    ###################################
+    FLAGS.model.image_channels = example_obs.shape[-1]
+    FLAGS.model.image_size = example_obs.shape[1]
+    vqvae_def = VQVAE(FLAGS.model, train=True)
+    vqvae_params = vqvae_def.init({'params': param_key, 'noise': param_key}, example_obs)['params']
+    # tx = optax.adam(learning_rate=FLAGS.model['lr'], b1=FLAGS.model['beta1'], b2=FLAGS.model['beta2'])
+    vqvae_ts = TrainState.create(vqvae_def, vqvae_params)#, tx=tx) #Turning off tx because we don't need it...
+    vqvae_def_eps = VQVAE(FLAGS.model, train=False)
+    vqvae_eps_ts = TrainState.create(vqvae_def_eps, vqvae_params)
+    print("Total num of VQVAE parameters:", sum(x.size for x in jax.tree_util.tree_leaves(vqvae_params)))
+    discriminator_def = Discriminator(FLAGS.model)
+    discriminator_params = discriminator_def.init(param_key, example_obs)['params']
+    # tx = optax.adam(learning_rate=FLAGS.model['lr'], b1=FLAGS.model['beta1'], b2=FLAGS.model['beta2'])
+    discriminator_ts = TrainState.create(discriminator_def, discriminator_params)#, tx=tx)#No tx again
+    print("Total num of Discriminator parameters:", sum(x.size for x in jax.tree_util.tree_leaves(discriminator_params)))
+    model = VQGANModel(rng=rng, vqvae=vqvae_ts, vqvae_eps=vqvae_eps_ts, discriminator=discriminator_ts, config=FLAGS.model)
+    assert FLAGS.load_dir is not None
+    cp = Checkpoint(FLAGS.load_dir)
+    model = cp.load_model(model)
+    print("Loaded model with step", model.vqvae.step)
+    model = flax.jax_utils.replicate(model, devices=jax.local_devices())
+    jax.debug.visualize_array_sharding(model.vqvae.params['decoder']['Conv_0']['bias'])
+    #print(model.vqvae)
+    ####################################
+    # Noise stuff
+    ###################################
+    cpus = jax.devices("cpu")
+    #So there are a few ways to calculate PPL here
+    #We could take two images in image space
+    #Walk between them and check the LPIPS in the output space
+    #...actually that's basically it right?
+    #We could also do the walk in latent space, which is the same, but with ?? scaling
+    #Let's see if they are any different.
+    i = 0
+    lpips_list = []
+    means = []
+    stds = []
+    for valid_images in dataset_valid:
+        valid_images = valid_images.reshape((len(jax.local_devices()), -1, *valid_images.shape[1:])) # [devices, batch//devices, etc..]
+        #1, 2, 256, 256, 3
+        #Given our 2 images, we want to lerp between them...
+        #We want to lerp once to point t, and once to point t + eps
+        #And then we want to get the LPIPS between those two images
+        #And then we calculate LPIPS
+        #And then we divide by eps squared, and done.
+        reconstructed_images, decoded, std, latents, std_noisy, latents_noisy = model.reconstruction_ppl_image(valid_images) # [devices, 8, 256, 256, 3]
+        means.append(latents.mean())
+        stds.append(latents.std())
+        # print("std", std.mean())
+        print("latent mean", latents.mean())
+        print("actual latent std", latents.std())
+        print("latent mean noisy", latents_noisy.mean())
+        print("actual latent std noisy", latents_noisy.std())
+        #Need to change images back to -1,1
+        reconstructed_images = reconstructed_images * 2 - 1
+        decoded = decoded * 2 -1
+        #1,2,256,256,3
+        reconstructed_images = jnp.swapaxes(reconstructed_images, 0, 4)
+        decoded = jnp.swapaxes(decoded, 0, 4)
+        reconstructed_images = jnp.swapaxes(reconstructed_images, 0, 1)
+        decoded = jnp.swapaxes(decoded, 0, 1)
+        reconstructed_images = jnp.squeeze(reconstructed_images)
+        decoded = jnp.squeeze(decoded)
+        #So here, we want to put them on CPU and delete the original
+        image_np = np.asarray(reconstructed_images)
+        image_np_2 = torch.from_numpy(np.copy(image_np)).cuda()
+        decoded_np = np.asarray(decoded)
+        decoded_np_2 = torch.from_numpy(np.copy(decoded_np)).cuda()
+        lpips_loss = loss_fn_alex(image_np_2, decoded_np_2)
+        lpips_cpu = lpips_loss.detach().cpu().squeeze().mean()
+        lpips_cpu = lpips_cpu / (.0001 ** 2)
+        print(lpips_cpu)
+        lpips_list.append(lpips_cpu)
+        i += 1
+        #
+        if i == 500:
+            break
+        #1e-4 is 54...
+        #1e-5 is 106
+        #1e-6 is 126
+        #kl2 is 150?
+    mean_lpips = jnp.mean(jnp.asarray(lpips_list))
+    print(mean_lpips)
+    print("mean of means", jnp.asarray(means).mean())
+    print("stds of means", jnp.asarray(means).std())
+    print("mean of stds", jnp.asarray(stds).mean())
+    print("std of stds", jnp.asarray(stds).std())
+if __name__ == '__main__':
+    app.run(main)

f16c16/ppl_latents.py ADDED Viewed

	@@ -0,0 +1,307 @@

+try: # For debugging
+    from localutils.debugger import enable_debug
+    enable_debug()
+except ImportError:
+    pass
+#import jax
+#jax.config.update('jax_platform_name', 'cpu')
+import os
+# os.environ["JAX_PLATFORMS"] = 'cpu'
+import jax
+import lpips
+loss_fn_alex = lpips.LPIPS(net='alex') # best forward scores
+loss_fn_alex = loss_fn_alex.cuda()
+import numpy as np
+import flax.linen as nn
+import jax.numpy as jnp
+from absl import app, flags
+from functools import partial
+import numpy as np
+import tqdm
+import flax
+import optax
+import wandb
+from ml_collections import config_flags
+#import elements
+import ml_collections
+import tensorflow_datasets as tfds
+import tensorflow as tf
+tf.config.set_visible_devices([], "GPU")
+tf.config.set_visible_devices([], "TPU")
+import matplotlib.pyplot as plt
+from typing import Any
+from utils.train_state import TrainState, target_update
+from utils.checkpoint import Checkpoint
+from utils.fid import get_fid_network, fid_from_stats
+from train import VQGANModel
+from models.vqvae import VQVAE
+from models.discriminator import Discriminator
+from PIL import Image
+import torch
+delattr(flags.FLAGS, 'dataset_name')
+delattr(flags.FLAGS, 'load_dir')
+delattr(flags.FLAGS, 'batch_size')
+FLAGS = flags.FLAGS
+flags.DEFINE_string('dataset_name', 'imagenet256', 'Environment name.')
+flags.DEFINE_string('load_dir', "/home/dkaplan/Downloads/Models/checkpoint(1).tmp", 'Load dir (if not None, load params from here).')
+flags.DEFINE_integer('batch_size', 2, 'Total Batch size.')
+# Flags are inhereited from train.py, so pass your model parameters again here to evaluate.
+import gc
+def main(_):
+    device_count = len(jax.local_devices())
+    global_device_count = jax.device_count()
+    local_batch_size = FLAGS.batch_size // (global_device_count // device_count)
+    def get_dataset(is_train):
+        if 'imagenet' in FLAGS.dataset_name:
+            def deserialization_fn(data):
+                image = data['image']
+                min_side = tf.minimum(tf.shape(image)[0], tf.shape(image)[1])
+                image = tf.image.resize_with_crop_or_pad(image, min_side, min_side)
+                if 'imagenet256' in FLAGS.dataset_name:
+                    image = tf.image.resize(image, (256, 256))
+                elif 'imagenet128' in FLAGS.dataset_name:
+                    image = tf.image.resize(image, (128, 128))
+                else:
+                    raise ValueError(f"Unknown dataset {FLAGS.dataset_name}")
+                if is_train:
+                    image = tf.image.random_flip_left_right(image)
+                image = tf.cast(image, tf.float32) / 255.0
+                return image
+            split = tfds.split_for_jax_process('train' if is_train else 'validation', drop_remainder=True)
+            dataset = tfds.load('imagenet2012', data_dir="/data/inet", split=split)
+            dataset = dataset.map(deserialization_fn, num_parallel_calls=tf.data.AUTOTUNE)
+            dataset = dataset.shuffle(10000, seed=42, reshuffle_each_iteration=True)
+            dataset = dataset.batch(local_batch_size)
+            dataset = dataset.prefetch(tf.data.AUTOTUNE)
+            dataset = tfds.as_numpy(dataset)
+            dataset = iter(dataset)
+            return dataset
+        else:
+            raise ValueError(f"Unknown dataset {FLAGS.dataset_name}")
+    dataset = get_dataset(is_train=True)
+    dataset_valid = get_dataset(is_train=False)
+    # image = Image.open("osman.png")
+    # image = np.array(image) / 255.0
+    # print(image)
+    # image = jnp.array(image)
+    # image = jnp.expand_dims(image, 0)
+    # image = jnp.expand_dims(image, 0)
+    example_obs = next(dataset)[:1]
+    #Reconstruction loop
+    # image = model.reconstruction(image)
+    # image = image[0,0,:,:,:]
+    # image = (image * 255).astype(np.uint8)
+    # image = np.array(image)
+    # img = Image.fromarray(image)
+    # img.save("osman" + str(i) + ".png")
+    rng = jax.random.PRNGKey(FLAGS.seed)
+    rng, param_key = jax.random.split(rng)
+    print("Total devices", jax.local_devices()[0])
+    ###################################
+    # Creating Model and put on devices.
+    ###################################
+    FLAGS.model.image_channels = example_obs.shape[-1]
+    FLAGS.model.image_size = example_obs.shape[1]
+    vqvae_def = VQVAE(FLAGS.model, train=True)
+    vqvae_params = vqvae_def.init({'params': param_key, 'noise': param_key}, example_obs)['params']
+    # tx = optax.adam(learning_rate=FLAGS.model['lr'], b1=FLAGS.model['beta1'], b2=FLAGS.model['beta2'])
+    vqvae_ts = TrainState.create(vqvae_def, vqvae_params)#, tx=tx) #Turning off tx because we don't need it...
+    vqvae_def_eps = VQVAE(FLAGS.model, train=False)
+    vqvae_eps_ts = TrainState.create(vqvae_def_eps, vqvae_params)
+    print("Total num of VQVAE parameters:", sum(x.size for x in jax.tree_util.tree_leaves(vqvae_params)))
+    discriminator_def = Discriminator(FLAGS.model)
+    discriminator_params = discriminator_def.init(param_key, example_obs)['params']
+    # tx = optax.adam(learning_rate=FLAGS.model['lr'], b1=FLAGS.model['beta1'], b2=FLAGS.model['beta2'])
+    discriminator_ts = TrainState.create(discriminator_def, discriminator_params)#, tx=tx)#No tx again
+    print("Total num of Discriminator parameters:", sum(x.size for x in jax.tree_util.tree_leaves(discriminator_params)))
+    model = VQGANModel(rng=rng, vqvae=vqvae_ts, vqvae_eps=vqvae_eps_ts, discriminator=discriminator_ts, config=FLAGS.model)
+    assert FLAGS.load_dir is not None
+    cp = Checkpoint(FLAGS.load_dir)
+    model = cp.load_model(model)
+    print("Loaded model with step", model.vqvae.step)
+    model = flax.jax_utils.replicate(model, devices=jax.local_devices())
+    jax.debug.visualize_array_sharding(model.vqvae.params['decoder']['Conv_0']['bias'])
+    #print(model.vqvae)
+    ####################################
+    # Noise stuff
+    ###################################
+    cpus = jax.devices("cpu")
+    #So there are a few ways to calculate PPL here
+    #We could take two images in image space
+    #Walk between them and check the LPIPS in the output space
+    #...actually that's basically it right?
+    #We could also do the walk in latent space, which is the same, but with ?? scaling
+    #Let's see if they are any different.
+    #We could also try taking a latent, going X/2 direction, and -X/2 direction, and seeing that.
+    i = 0
+    lpips_list = []
+    means = []
+    stds = []
+    for valid_images in dataset_valid:
+        valid_images = valid_images.reshape((len(jax.local_devices()), -1, *valid_images.shape[1:])) # [devices, batch//devices, etc..]
+        #1, 2, 256, 256, 3
+        #Given our 2 images, we want to lerp between them...
+        #We want to lerp once to point t, and once to point t + eps
+        #And then we want to get the LPIPS between those two images
+        #And then we calculate LPIPS
+        #And then we divide by eps squared, and done.
+        reconstructed_images, decoded, std, latents = model.reconstruction_ppl(valid_images) # [devices, 8, 256, 256, 3]
+        means.append(latents.mean())
+        stds.append(latents.std())
+        print("noise added", std.mean())
+        print("latent mean", latents.mean())
+        print("actual latent std", latents.std())
+        #Need to change images back to -1,1
+        reconstructed_images = reconstructed_images * 2 - 1
+        decoded = decoded * 2 -1
+        #1,2,256,256,3
+        reconstructed_images = jnp.swapaxes(reconstructed_images, 0, 4)
+        decoded = jnp.swapaxes(decoded, 0, 4)
+        reconstructed_images = jnp.swapaxes(reconstructed_images, 0, 1)
+        decoded = jnp.swapaxes(decoded, 0, 1)
+        reconstructed_images = jnp.squeeze(reconstructed_images)
+        decoded = jnp.squeeze(decoded)
+        #So here, we want to put them on CPU and delete the original
+        image_np = np.asarray(reconstructed_images)
+        image_np_2 = torch.from_numpy(np.copy(image_np)).cuda()
+        decoded_np = np.asarray(decoded)
+        decoded_np_2 = torch.from_numpy(np.copy(decoded_np)).cuda()
+        lpips_loss = loss_fn_alex(image_np_2, decoded_np_2)
+        lpips_cpu = lpips_loss.detach().cpu().squeeze().mean()
+        lpips_cpu = lpips_cpu / (.0001 ** 2)
+        print(lpips_cpu)
+        lpips_list.append(lpips_cpu)
+        i += 1
+        #
+        if i == 500:
+            break
+    mean_lpips = jnp.mean(jnp.asarray(lpips_list))
+    std_lpips = jnp.std(jnp.asarray(lpips_list))
+    print("PPL", mean_lpips)
+    print("C std", std_lpips)
+    print("mean of means", jnp.asarray(means).mean())
+    print("stds of means", jnp.asarray(means).std())
+    print("mean of stds", jnp.asarray(stds).mean())
+    print("std of stds", jnp.asarray(stds).std())
+    #ae sym
+    # mean of means 0.35234922
+    # stds of means 0.4036692
+    # mean of stds 2.6363409
+    # std of stds 0.30666474
+    #1e-6:
+    #mean of means -0.018107202
+    # stds of means 0.11694455
+    # mean of stds 1.0860059
+    # std of stds 0.09732369
+    #average noise added around .03
+    #1e-5:
+    # mean of means 0.0065166513
+    # stds of means 0.06983645
+    # mean of stds 0.9855982
+    # std of stds 0.05810356
+    #1e-4:
+    # PPL 8.167942
+    # C std 1.7576017
+    # mean of means 0.0065882676
+    # stds of means 0.042861093
+    # mean of stds 0.7608507
+    # std of stds 0.05846726
+    #Average noise added???
+    #pl300
+    #PPL 3.5399284
+    #C std 0.45380986
+    # mean of means 0.090131655
+    # stds of means 0.69894844
+    # mean of stds 5.5634923
+    # std of stds 0.6767279
+    #pl100
+    # PPL 3.6192155
+    # C std 0.47185272
+    # mean of means 0.16227543
+    # stds of means 0.53616405
+    # mean of stds 4.4914503
+    # std of stds 0.6015057
+    #kl2 noise thing
+    # PPL 1.2598925
+    # C std 0.26455516
+    # mean of means -0.013443217
+    # stds of means 1.5238239
+    # mean of stds 40.043938
+    # std of stds 1.7931403
+if __name__ == '__main__':
+    app.run(main)

f16c16/ppl_latents2.py ADDED Viewed

	@@ -0,0 +1,283 @@

+try: # For debugging
+    from localutils.debugger import enable_debug
+    enable_debug()
+except ImportError:
+    pass
+#import jax
+#jax.config.update('jax_platform_name', 'cpu')
+import os
+# os.environ["JAX_PLATFORMS"] = 'cpu'
+import jax
+import lpips
+loss_fn_alex = lpips.LPIPS(net='alex') # best forward scores
+loss_fn_alex = loss_fn_alex.cuda()
+import numpy as np
+import flax.linen as nn
+import jax.numpy as jnp
+from absl import app, flags
+from functools import partial
+import numpy as np
+import tqdm
+import flax
+import optax
+import wandb
+from ml_collections import config_flags
+#import elements
+import ml_collections
+import tensorflow_datasets as tfds
+import tensorflow as tf
+tf.config.set_visible_devices([], "GPU")
+tf.config.set_visible_devices([], "TPU")
+import matplotlib.pyplot as plt
+from typing import Any
+from utils.train_state import TrainState, target_update
+from utils.checkpoint import Checkpoint
+from utils.fid import get_fid_network, fid_from_stats
+from train import VQGANModel
+from models.vqvae import VQVAE
+from models.discriminator import Discriminator
+from PIL import Image
+import torch
+delattr(flags.FLAGS, 'dataset_name')
+delattr(flags.FLAGS, 'load_dir')
+delattr(flags.FLAGS, 'batch_size')
+FLAGS = flags.FLAGS
+flags.DEFINE_string('dataset_name', 'imagenet256', 'Environment name.')
+flags.DEFINE_string('load_dir', "/home/dkaplan/Downloads/Models/checkpoint(1).tmp", 'Load dir (if not None, load params from here).')
+flags.DEFINE_integer('batch_size', 2, 'Total Batch size.')
+# Flags are inhereited from train.py, so pass your model parameters again here to evaluate.
+import gc
+def main(_):
+    device_count = len(jax.local_devices())
+    global_device_count = jax.device_count()
+    local_batch_size = FLAGS.batch_size // (global_device_count // device_count)
+    def get_dataset(is_train):
+        if 'imagenet' in FLAGS.dataset_name:
+            def deserialization_fn(data):
+                image = data['image']
+                min_side = tf.minimum(tf.shape(image)[0], tf.shape(image)[1])
+                image = tf.image.resize_with_crop_or_pad(image, min_side, min_side)
+                if 'imagenet256' in FLAGS.dataset_name:
+                    image = tf.image.resize(image, (256, 256))
+                elif 'imagenet128' in FLAGS.dataset_name:
+                    image = tf.image.resize(image, (128, 128))
+                else:
+                    raise ValueError(f"Unknown dataset {FLAGS.dataset_name}")
+                if is_train:
+                    image = tf.image.random_flip_left_right(image)
+                image = tf.cast(image, tf.float32) / 255.0
+                return image
+            split = tfds.split_for_jax_process('train' if is_train else 'validation', drop_remainder=True)
+            dataset = tfds.load('imagenet2012', data_dir="/data/inet", split=split)
+            dataset = dataset.map(deserialization_fn, num_parallel_calls=tf.data.AUTOTUNE)
+            dataset = dataset.shuffle(10000, seed=42, reshuffle_each_iteration=True)
+            dataset = dataset.batch(local_batch_size)
+            dataset = dataset.prefetch(tf.data.AUTOTUNE)
+            dataset = tfds.as_numpy(dataset)
+            dataset = iter(dataset)
+            return dataset
+        else:
+            raise ValueError(f"Unknown dataset {FLAGS.dataset_name}")
+    dataset = get_dataset(is_train=True)
+    dataset_valid = get_dataset(is_train=False)
+    # image = Image.open("osman.png")
+    # image = np.array(image) / 255.0
+    # print(image)
+    # image = jnp.array(image)
+    # image = jnp.expand_dims(image, 0)
+    # image = jnp.expand_dims(image, 0)
+    example_obs = next(dataset)[:1]
+    #Reconstruction loop
+    # image = model.reconstruction(image)
+    # image = image[0,0,:,:,:]
+    # image = (image * 255).astype(np.uint8)
+    # image = np.array(image)
+    # img = Image.fromarray(image)
+    # img.save("osman" + str(i) + ".png")
+    rng = jax.random.PRNGKey(FLAGS.seed)
+    rng, param_key = jax.random.split(rng)
+    print("Total devices", jax.local_devices()[0])
+    ###################################
+    # Creating Model and put on devices.
+    ###################################
+    FLAGS.model.image_channels = example_obs.shape[-1]
+    FLAGS.model.image_size = example_obs.shape[1]
+    vqvae_def = VQVAE(FLAGS.model, train=True)
+    vqvae_params = vqvae_def.init({'params': param_key, 'noise': param_key}, example_obs)['params']
+    # tx = optax.adam(learning_rate=FLAGS.model['lr'], b1=FLAGS.model['beta1'], b2=FLAGS.model['beta2'])
+    vqvae_ts = TrainState.create(vqvae_def, vqvae_params)#, tx=tx) #Turning off tx because we don't need it...
+    vqvae_def_eps = VQVAE(FLAGS.model, train=False)
+    vqvae_eps_ts = TrainState.create(vqvae_def_eps, vqvae_params)
+    print("Total num of VQVAE parameters:", sum(x.size for x in jax.tree_util.tree_leaves(vqvae_params)))
+    discriminator_def = Discriminator(FLAGS.model)
+    discriminator_params = discriminator_def.init(param_key, example_obs)['params']
+    # tx = optax.adam(learning_rate=FLAGS.model['lr'], b1=FLAGS.model['beta1'], b2=FLAGS.model['beta2'])
+    discriminator_ts = TrainState.create(discriminator_def, discriminator_params)#, tx=tx)#No tx again
+    print("Total num of Discriminator parameters:", sum(x.size for x in jax.tree_util.tree_leaves(discriminator_params)))
+    model = VQGANModel(rng=rng, vqvae=vqvae_ts, vqvae_eps=vqvae_eps_ts, discriminator=discriminator_ts, config=FLAGS.model)
+    assert FLAGS.load_dir is not None
+    cp = Checkpoint(FLAGS.load_dir)
+    model = cp.load_model(model)
+    print("Loaded model with step", model.vqvae.step)
+    model = flax.jax_utils.replicate(model, devices=jax.local_devices())
+    jax.debug.visualize_array_sharding(model.vqvae.params['decoder']['Conv_0']['bias'])
+    #print(model.vqvae)
+    ####################################
+    # Noise stuff
+    ###################################
+    cpus = jax.devices("cpu")
+    #So there are a few ways to calculate PPL here
+    #We could take two images in image space
+    #Walk between them and check the LPIPS in the output space
+    #...actually that's basically it right?
+    #We could also do the walk in latent space, which is the same, but with ?? scaling
+    #Let's see if they are any different.
+    #We could also try taking a latent, going X/2 direction, and -X/2 direction, and seeing that.
+    i = 0
+    lpips_list = []
+    means = []
+    stds = []
+    for valid_images in dataset_valid:
+        valid_images = valid_images.reshape((len(jax.local_devices()), -1, *valid_images.shape[1:])) # [devices, batch//devices, etc..]
+        #1, 2, 256, 256, 3
+        #Given our 2 images, we want to lerp between them...
+        #We want to lerp once to point t, and once to point t + eps
+        #And then we want to get the LPIPS between those two images
+        #And then we calculate LPIPS
+        #And then we divide by eps squared, and done.
+        reconstructed_images, decoded, std, latents, decoded_2 = model.reconstruction_ppl_two(valid_images) # [devices, 8, 256, 256, 3]
+        means.append(latents.mean())
+        stds.append(latents.std())
+        # print("std", std.mean())
+        print("latent mean", latents.mean())
+        print("actual latent std", latents.std())
+        #Need to change images back to -1,1
+        #Why are the images so similar? It's different noises...
+        reconstructed_images = decoded_2 * 2 - 1
+        decoded = decoded * 2 -1
+        #1,2,256,256,3
+        reconstructed_images = jnp.swapaxes(reconstructed_images, 0, 4)
+        decoded = jnp.swapaxes(decoded, 0, 4)
+        reconstructed_images = jnp.swapaxes(reconstructed_images, 0, 1)
+        decoded = jnp.swapaxes(decoded, 0, 1)
+        reconstructed_images = jnp.squeeze(reconstructed_images)
+        decoded = jnp.squeeze(decoded)
+        #So here, we want to put them on CPU and delete the original
+        image_np = np.asarray(reconstructed_images)
+        image_np_2 = torch.from_numpy(np.copy(image_np)).cuda()
+        decoded_np = np.asarray(decoded)
+        decoded_np_2 = torch.from_numpy(np.copy(decoded_np)).cuda()
+        lpips_loss = loss_fn_alex(image_np_2, decoded_np_2)
+        lpips_cpu = lpips_loss.detach().cpu().squeeze().mean()
+        lpips_cpu = lpips_cpu / (.0001 ** 2)
+        print(lpips_cpu)
+        lpips_list.append(lpips_cpu)
+        i += 1
+        #
+        if i == 500:
+            break
+    mean_lpips = jnp.mean(jnp.asarray(lpips_list))
+    print(mean_lpips)
+    print("mean of means", jnp.asarray(means).mean())
+    print("stds of means", jnp.asarray(means).std())
+    print("mean of stds", jnp.asarray(stds).mean())
+    print("std of stds", jnp.asarray(stds).std())
+    #1e-4? 8.1371
+    #1e-5 9.0486
+    #1e-6 9.7
+    #ae is a 5.85.....
+    #1e-4 kl2 1.26
+    #1e-6 is 9.8
+    #1e-5 is 9.09
+    #2e-5 is ..... between these. hopefully. 8.83
+    #1e-4 is 8.16
+    #ae (sym) is 5.87 right now, somehow.
+    #basicallly ae 5.56, then 4.95?
+    #PL100 is 3.6
+    #Pl300 is 3.53
+    #Pl600 is... 3.97
+    #So the kl level barely matters it seems.
+    #We might want to try MMD + noise, but it also barely matters I think
+    #1e-4 was 1.25
+    #5e-5 was 1.225
+    #kl2 was like super duper low, forgot to save it lol. 1.17 maybe?
+if __name__ == '__main__':
+    app.run(main)

f16c16/stats.py ADDED Viewed

	@@ -0,0 +1,362 @@

+try: # For debugging
+    from localutils.debugger import enable_debug
+    enable_debug()
+except ImportError:
+    pass
+#import jax
+#jax.config.update('jax_platform_name', 'cpu')
+import os
+# os.environ["JAX_PLATFORMS"] = 'cpu'
+import jax
+import lpips
+loss_fn_alex = lpips.LPIPS(net='alex') # best forward scores
+loss_fn_alex = loss_fn_alex.cuda()
+import numpy as np
+import flax.linen as nn
+import jax.numpy as jnp
+from absl import app, flags
+from functools import partial
+import numpy as np
+import tqdm
+import flax
+import optax
+import wandb
+from ml_collections import config_flags
+#import elements
+import ml_collections
+import tensorflow_datasets as tfds
+import tensorflow as tf
+tf.config.set_visible_devices([], "GPU")
+tf.config.set_visible_devices([], "TPU")
+import matplotlib.pyplot as plt
+from typing import Any
+from utils.train_state import TrainState, target_update
+from utils.checkpoint import Checkpoint
+from utils.fid import get_fid_network, fid_from_stats
+from train import VQGANModel
+from models.vqvae import VQVAE
+from models.discriminator import Discriminator
+from PIL import Image
+import torch
+delattr(flags.FLAGS, 'dataset_name')
+delattr(flags.FLAGS, 'load_dir')
+delattr(flags.FLAGS, 'batch_size')
+FLAGS = flags.FLAGS
+flags.DEFINE_string('dataset_name', 'imagenet256', 'Environment name.')
+flags.DEFINE_string('load_dir', "/home/dkaplan/Downloads/Models/checkpoint(1).tmp", 'Load dir (if not None, load params from here).')
+flags.DEFINE_integer('batch_size', 2, 'Total Batch size.')
+# Flags are inhereited from train.py, so pass your model parameters again here to evaluate.
+import gc
+def main(_):
+    device_count = len(jax.local_devices())
+    global_device_count = jax.device_count()
+    local_batch_size = FLAGS.batch_size // (global_device_count // device_count)
+    def get_dataset(is_train):
+        if 'imagenet' in FLAGS.dataset_name:
+            def deserialization_fn(data):
+                image = data['image']
+                min_side = tf.minimum(tf.shape(image)[0], tf.shape(image)[1])
+                image = tf.image.resize_with_crop_or_pad(image, min_side, min_side)
+                if 'imagenet256' in FLAGS.dataset_name:
+                    image = tf.image.resize(image, (256, 256))
+                elif 'imagenet128' in FLAGS.dataset_name:
+                    image = tf.image.resize(image, (128, 128))
+                else:
+                    raise ValueError(f"Unknown dataset {FLAGS.dataset_name}")
+                if is_train:
+                    image = tf.image.random_flip_left_right(image)
+                image = tf.cast(image, tf.float32) / 255.0
+                return image
+            split = tfds.split_for_jax_process('train' if is_train else 'validation', drop_remainder=True)
+            dataset = tfds.load('imagenet2012', data_dir="/data/inet", split=split)
+            dataset = dataset.map(deserialization_fn, num_parallel_calls=tf.data.AUTOTUNE)
+            dataset = dataset.shuffle(10000, seed=42, reshuffle_each_iteration=True)
+            dataset = dataset.batch(local_batch_size)
+            dataset = dataset.prefetch(tf.data.AUTOTUNE)
+            dataset = tfds.as_numpy(dataset)
+            dataset = iter(dataset)
+            return dataset
+        else:
+            raise ValueError(f"Unknown dataset {FLAGS.dataset_name}")
+    dataset = get_dataset(is_train=True)
+    dataset_valid = get_dataset(is_train=False)
+    # image = Image.open("osman.png")
+    # image = np.array(image) / 255.0
+    # print(image)
+    # image = jnp.array(image)
+    # image = jnp.expand_dims(image, 0)
+    # image = jnp.expand_dims(image, 0)
+    example_obs = next(dataset)[:1]
+    #Reconstruction loop
+    # image = model.reconstruction(image)
+    # image = image[0,0,:,:,:]
+    # image = (image * 255).astype(np.uint8)
+    # image = np.array(image)
+    # img = Image.fromarray(image)
+    # img.save("osman" + str(i) + ".png")
+    rng = jax.random.PRNGKey(FLAGS.seed)
+    rng, param_key = jax.random.split(rng)
+    print("Total devices", jax.local_devices()[0])
+    ###################################
+    # Creating Model and put on devices.
+    ###################################
+    FLAGS.model.image_channels = example_obs.shape[-1]
+    FLAGS.model.image_size = example_obs.shape[1]
+    vqvae_def = VQVAE(FLAGS.model, train=True)
+    vqvae_params = vqvae_def.init({'params': param_key, 'noise': param_key}, example_obs)['params']
+    # tx = optax.adam(learning_rate=FLAGS.model['lr'], b1=FLAGS.model['beta1'], b2=FLAGS.model['beta2'])
+    vqvae_ts = TrainState.create(vqvae_def, vqvae_params)#, tx=tx) #Turning off tx because we don't need it...
+    vqvae_def_eps = VQVAE(FLAGS.model, train=False)
+    vqvae_eps_ts = TrainState.create(vqvae_def_eps, vqvae_params)
+    print("Total num of VQVAE parameters:", sum(x.size for x in jax.tree_util.tree_leaves(vqvae_params)))
+    discriminator_def = Discriminator(FLAGS.model)
+    discriminator_params = discriminator_def.init(param_key, example_obs)['params']
+    # tx = optax.adam(learning_rate=FLAGS.model['lr'], b1=FLAGS.model['beta1'], b2=FLAGS.model['beta2'])
+    discriminator_ts = TrainState.create(discriminator_def, discriminator_params)#, tx=tx)#No tx again
+    print("Total num of Discriminator parameters:", sum(x.size for x in jax.tree_util.tree_leaves(discriminator_params)))
+    model = VQGANModel(rng=rng, vqvae=vqvae_ts, vqvae_eps=vqvae_eps_ts, discriminator=discriminator_ts, config=FLAGS.model)
+    assert FLAGS.load_dir is not None
+    cp = Checkpoint(FLAGS.load_dir)
+    model = cp.load_model(model)
+    print("Loaded model with step", model.vqvae.step)
+    model = flax.jax_utils.replicate(model, devices=jax.local_devices())
+    jax.debug.visualize_array_sharding(model.vqvae.params['decoder']['Conv_0']['bias'])
+    #print(model.vqvae)
+    ####################################
+    # Noise stuff
+    ###################################
+    #on the other end also.
+    noises = []
+    numbers = np.arange(0.00, 1.0, 0.01)
+    for number in numbers:
+        noises.append(float(number))
+    # numbers = np.arange(.4, 3, .5)
+    # for number in numbers:
+    #     noises.append(float(number))
+    i = 0
+    l2_dict = {noise: [] for noise in noises}
+    lpips_dict = {noise: [] for noise in noises}
+    snr_dict = {noise: [] for noise in noises}
+    cpus = jax.devices("cpu")
+    print(noises)
+    for valid_images in dataset_valid:
+        print(i)
+        valid_images = valid_images.reshape((len(jax.local_devices()), -1, *valid_images.shape[1:])) # [devices, batch//devices, etc..]
+        # valid_reconstructed_images = model.reconstruction(valid_images) # [devices, 8, 256, 256, 3]
+        valid_reconstructed_images, noisy_reconstructed_images, std = model.reconstruction_noisy(valid_images)
+        print(std.mean())
+        # valid_reconstructed_images, noisy_reconstructed_images = model.reconstruction_sampling(valid_images) # [devices, 8, 256, 256, 3]
+        # print(latents)
+        #Calculate MSE between valid and noisy.
+        if True:
+            for noise, decoded in zip(noises, noisy_reconstructed_images):
+                image, snr = decoded
+                snr = snr.mean()#So this gives us the snr for a given noise level. need to mean it..
+                snr_dict[noise].append(snr)
+                #So we put it into the noise list.
+                # print("snr", snr)
+                l2 = jnp.mean((valid_reconstructed_images - image) ** 2)
+                l2_cpu = jax.device_put(l2, cpus[0])
+                l2_dict[noise].append(l2_cpu)
+                #Need to change images back to -1,1
+                image = image * 2 - 1
+                valid_rescaled = valid_reconstructed_images * 2 -1
+                #1,2,256,256,3
+                image = jnp.swapaxes(image, 0, 4)
+                valid_rescaled = jnp.swapaxes(valid_rescaled, 0, 4)
+                image = jnp.swapaxes(image, 0, 1)
+                valid_rescaled = jnp.swapaxes(valid_rescaled, 0, 1)
+                image = jnp.squeeze(image)
+                valid_rescaled = jnp.squeeze(valid_rescaled)
+                #So here, we want to put them on CPU and delete the original
+                image_np = np.asarray(image)
+                image_np_2 = torch.from_numpy(np.copy(image_np)).cuda()
+                #Can be run only once if needd
+                valid_rescaled_np = np.asarray(valid_rescaled)
+                valid_rescaled_np_2 = torch.from_numpy(np.copy(valid_rescaled_np)).cuda()
+                lpips_loss = loss_fn_alex(valid_rescaled_np_2, image_np_2)
+                lpips_cpu = lpips_loss.detach().cpu().squeeze().mean()
+                lpips_dict[noise].append(lpips_cpu)
+        elif False:#Check l2 and lpips on our 2 images..
+            l2 = jnp.mean((valid_reconstructed_images - noisy_reconstructed_images) ** 2)
+            l2_cpu = jax.device_put(l2, cpus[0])
+            print("L2", l2_cpu)
+            #Need to change images back to -1,1
+            valid_reconstructed_images = valid_reconstructed_images * 2 - 1
+            noisy_reconstructed_images = noisy_reconstructed_images * 2 -1
+            #1,2,256,256,3
+            valid_reconstructed_images = jnp.swapaxes(valid_reconstructed_images, 0, 4)
+            noisy_reconstructed_images = jnp.swapaxes(noisy_reconstructed_images, 0, 4)
+            valid_reconstructed_images = jnp.swapaxes(valid_reconstructed_images, 0, 1)
+            noisy_reconstructed_images = jnp.swapaxes(noisy_reconstructed_images, 0, 1)
+            valid_reconstructed_images = jnp.squeeze(valid_reconstructed_images)
+            noisy_reconstructed_images = jnp.squeeze(noisy_reconstructed_images)
+            #So here, we want to put them on CPU and delete the original
+            image_np = np.asarray(valid_reconstructed_images)
+            image_np_2 = torch.from_numpy(np.copy(image_np)).cuda()
+            valid_rescaled_np = np.asarray(noisy_reconstructed_images)
+            valid_rescaled_np_2 = torch.from_numpy(np.copy(valid_rescaled_np)).cuda()
+            lpips_loss = loss_fn_alex(valid_rescaled_np_2, image_np_2)
+            lpips_cpu = lpips_loss.detach().cpu().squeeze().mean()
+            print("Lpips", lpips_cpu)
+        if False:
+            image = valid_images[0,0,:,:,:]
+            image = (image * 255).astype(np.uint8)
+            img = Image.fromarray(image)
+            img.save("original" + str(i) + ".png")
+            image2 = valid_reconstructed_images[0,0,:,:,:]
+            image2 = (image2 * 255).astype(np.uint8)
+            image2 = np.array(image2)
+            image2 = Image.fromarray(image2)
+            image2.save("recon" + str(i) + ".png")
+            #Needs [0] if list
+            # image3 = noisy_reconstructed_images[0][0,0,:,:,:]
+            image3 = noisy_reconstructed_images[2][0,0,:,:,:]
+            image3 = (image3 * 255).astype(np.uint8)
+            image3 = np.array(image3)
+            image3 = Image.fromarray(image3)
+            image3.save("noisy_recon_0_" + str(i) + ".png")
+            image4 = noisy_reconstructed_images[-1][0,0,:,:,:]
+            image4 = (image4 * 255).astype(np.uint8)
+            image4 = np.array(image4)
+            image4 = Image.fromarray(image4)
+            image4.save("noisy_recon_last_" + str(i) + ".png")
+        # del valid_images
+        # del valid_reconstructed_images
+        # del noisy_reconstructed_images
+        # gc.collect()
+        # torch.cuda.empty_cache()
+        i += 1
+        #
+        if i == 50:
+            break
+    #Now we have our l2 set.
+    mean_l2_dict = {noise: jnp.mean(jnp.asarray(l2_values)) for noise, l2_values in l2_dict.items()}
+    std_l2_dict = {noise: jnp.std(jnp.asarray(l2_values)) for noise, l2_values in l2_dict.items()}
+    for noise, mean_l2 in mean_l2_dict.items():
+        print(f"Mean L2 for noise {noise}: {mean_l2}")
+    mean_lpips_dict = {noise: torch.mean(torch.tensor(lpips_values)) for noise, lpips_values in lpips_dict.items()}
+    std_lpips_dict = {noise: torch.std(torch.tensor(lpips_values)) for noise, lpips_values in lpips_dict.items()}
+    for noise, mean_lpips in mean_lpips_dict.items():
+        print(f"Mean Lpips for noise {noise}: {mean_lpips}")
+    mean_snr_dict = {noise: jnp.mean(jnp.asarray(snr_values)) for noise, snr_values in snr_dict.items()}
+    std_snr_dict = {noise: jnp.std(jnp.asarray(snr_values)) for noise, snr_values in snr_dict.items()}
+    for noise, mean_snr in mean_snr_dict.items():
+        print(f"Mean SNR for noise {noise}: {mean_snr}")
+    array = []
+    for noise, std in std_lpips_dict.items():
+        array.append(np.asarray(std).tolist())
+    print(array)
+    print(std_lpips_dict)
+    print(std_snr_dict)#This tells us the range of SNR for a given image/noise level, which... should be lower...?
+    #pl300
+    #it's noise to std of the lpips at that noise, but we need....
+    #So our points are mean of the lpips at a noise level
+    #Mean of the
+    ''' PL300
+    {0.0: tensor(0.), 0.01: tensor(1.2151e-05), 0.02: tensor(4.2352e-05), 0.03: tensor(8.5722e-05), 0.04: tensor(0.0001), 0.05: tensor(0.0002), 0.06: tensor(0.0003), 0.07: tensor(0.0003), 0.08: tensor(0.0004), 0.09: tensor(0.0005), 0.1: tensor(0.0006), 0.11: tensor(0.0007), 0.12: tensor(0.0008), 0.13: tensor(0.0009), 0.14: tensor(0.0011), 0.15: tensor(0.0012), 0.16: tensor(0.0013), 0.17: tensor(0.0015), 0.18: tensor(0.0016), 0.19: tensor(0.0017), 0.2: tensor(0.0019), 0.21: tensor(0.0020), 0.22: tensor(0.0022), 0.23: tensor(0.0023), 0.24: tensor(0.0025), 0.25: tensor(0.0027), 0.26: tensor(0.0028), 0.27: tensor(0.0030), 0.28: tensor(0.0032), 0.29: tensor(0.0034), 0.3: tensor(0.0036), 0.31: tensor(0.0037), 0.32: tensor(0.0039), 0.33: tensor(0.0041), 0.34: tensor(0.0043), 0.35000000000000003: tensor(0.0045), 0.36: tensor(0.0047), 0.37: tensor(0.0050), 0.38: tensor(0.0052), 0.39: tensor(0.0054), 0.4: tensor(0.0056), 0.41000000000000003: tensor(0.0059), 0.42: tensor(0.0061), 0.43: tensor(0.0063), 0.44: tensor(0.0066), 0.45: tensor(0.0068), 0.46: tensor(0.0070), 0.47000000000000003: tensor(0.0073), 0.48: tensor(0.0075), 0.49: tensor(0.0078), 0.5: tensor(0.0080), 0.51: tensor(0.0083), 0.52: tensor(0.0086), 0.53: tensor(0.0088), 0.54: tensor(0.0091), 0.55: tensor(0.0094), 0.56: tensor(0.0097), 0.5700000000000001: tensor(0.0100), 0.58: tensor(0.0102), 0.59: tensor(0.0105), 0.6: tensor(0.0108), 0.61: tensor(0.0111), 0.62: tensor(0.0114), 0.63: tensor(0.0118), 0.64: tensor(0.0121), 0.65: tensor(0.0124), 0.66: tensor(0.0127), 0.67: tensor(0.0130), 0.68: tensor(0.0133), 0.6900000000000001: tensor(0.0136), 0.7000000000000001: tensor(0.0140), 0.71: tensor(0.0143), 0.72: tensor(0.0146), 0.73: tensor(0.0149), 0.74: tensor(0.0152), 0.75: tensor(0.0156), 0.76: tensor(0.0159), 0.77: tensor(0.0162), 0.78: tensor(0.0166), 0.79: tensor(0.0169), 0.8: tensor(0.0172), 0.81: tensor(0.0176), 0.8200000000000001: tensor(0.0179), 0.8300000000000001: tensor(0.0183), 0.84: tensor(0.0186), 0.85: tensor(0.0190), 0.86: tensor(0.0193), 0.87: tensor(0.0197), 0.88: tensor(0.0200), 0.89: tensor(0.0204), 0.9: tensor(0.0208), 0.91: tensor(0.0211), 0.92: tensor(0.0215), 0.93: tensor(0.0218), 0.9400000000000001: tensor(0.0222), 0.9500000000000001: tensor(0.0226), 0.96: tensor(0.0229), 0.97: tensor(0.0233), 0.98: tensor(0.0236), 0.99: tensor(0.0240)}
+    1e-4
+    {0.0: tensor(0.), 0.01: tensor(7.1912e-05), 0.02: tensor(0.0003), 0.03: tensor(0.0006), 0.04: tensor(0.0009), 0.05: tensor(0.0014), 0.06: tensor(0.0018), 0.07: tensor(0.0023), 0.08: tensor(0.0029), 0.09: tensor(0.0034), 0.1: tensor(0.0039), 0.11: tensor(0.0044), 0.12: tensor(0.0049), 0.13: tensor(0.0054), 0.14: tensor(0.0059), 0.15: tensor(0.0064), 0.16: tensor(0.0070), 0.17: tensor(0.0075), 0.18: tensor(0.0080), 0.19: tensor(0.0085), 0.2: tensor(0.0090), 0.21: tensor(0.0096), 0.22: tensor(0.0101), 0.23: tensor(0.0107), 0.24: tensor(0.0112), 0.25: tensor(0.0118), 0.26: tensor(0.0123), 0.27: tensor(0.0129), 0.28: tensor(0.0135), 0.29: tensor(0.0141), 0.3: tensor(0.0147), 0.31: tensor(0.0153), 0.32: tensor(0.0159), 0.33: tensor(0.0166), 0.34: tensor(0.0173), 0.35000000000000003: tensor(0.0180), 0.36: tensor(0.0187), 0.37: tensor(0.0194), 0.38: tensor(0.0201), 0.39: tensor(0.0207), 0.4: tensor(0.0214), 0.41000000000000003: tensor(0.0221), 0.42: tensor(0.0228), 0.43: tensor(0.0236), 0.44: tensor(0.0243), 0.45: tensor(0.0250), 0.46: tensor(0.0258), 0.47000000000000003: tensor(0.0266), 0.48: tensor(0.0274), 0.49: tensor(0.0282), 0.5: tensor(0.0290), 0.51: tensor(0.0298), 0.52: tensor(0.0305), 0.53: tensor(0.0313), 0.54: tensor(0.0321), 0.55: tensor(0.0328), 0.56: tensor(0.0336), 0.5700000000000001: tensor(0.0344), 0.58: tensor(0.0353), 0.59: tensor(0.0361), 0.6: tensor(0.0370), 0.61: tensor(0.0378), 0.62: tensor(0.0386), 0.63: tensor(0.0395), 0.64: tensor(0.0403), 0.65: tensor(0.0410), 0.66: tensor(0.0417), 0.67: tensor(0.0424), 0.68: tensor(0.0430), 0.6900000000000001: tensor(0.0436), 0.7000000000000001: tensor(0.0442), 0.71: tensor(0.0448), 0.72: tensor(0.0454), 0.73: tensor(0.0459), 0.74: tensor(0.0464), 0.75: tensor(0.0468), 0.76: tensor(0.0472), 0.77: tensor(0.0477), 0.78: tensor(0.0480), 0.79: tensor(0.0484), 0.8: tensor(0.0488), 0.81: tensor(0.0493), 0.8200000000000001: tensor(0.0497), 0.8300000000000001: tensor(0.0501), 0.84: tensor(0.0506), 0.85: tensor(0.0510), 0.86: tensor(0.0513), 0.87: tensor(0.0516), 0.88: tensor(0.0519), 0.89: tensor(0.0521), 0.9: tensor(0.0522), 0.91: tensor(0.0524), 0.92: tensor(0.0525), 0.93: tensor(0.0526), 0.9400000000000001: tensor(0.0526), 0.9500000000000001: tensor(0.0526), 0.96: tensor(0.0526), 0.97: tensor(0.0526), 0.98: tensor(0.0525), 0.99: tensor(0.0525)}
+    '''
+    # for (noise, lpips), (noise_2, snr) in zip(mean_lpips_dict.items(), mean_snr_dict.items()):
+    #     print(noise, snr)
+    #So here we want to print out our x, which is the mean_snr, and our y, which is the mean noise
+#        images.append((valid_reconstructed_images*255).astype(np.uint8))
+if __name__ == '__main__':
+    app.run(main)

f16c16/train.py ADDED Viewed

	@@ -0,0 +1,676 @@

+try: # For debugging
+    from localutils.debugger import enable_debug
+    enable_debug()
+except ImportError:
+    pass
+import flax.linen as nn
+import jax.numpy as jnp
+from absl import app, flags
+from functools import partial
+import numpy as np
+import tqdm
+import jax
+import jax.numpy as jnp
+import flax
+import optax
+import wandb
+from ml_collections import config_flags
+import ml_collections
+import tensorflow_datasets as tfds
+import tensorflow as tf
+tf.config.set_visible_devices([], "GPU")
+tf.config.set_visible_devices([], "TPU")
+import matplotlib.pyplot as plt
+from typing import Any
+import os
+from utils.wandb import setup_wandb, default_wandb_config
+from utils.train_state import TrainState, target_update
+from utils.checkpoint import Checkpoint
+from utils.pretrained_resnet import get_pretrained_embs, get_pretrained_model
+from utils.fid import get_fid_network, fid_from_stats
+from models.vqvae import VQVAE
+from models.discriminator import Discriminator
+FLAGS = flags.FLAGS
+flags.DEFINE_string('dataset_name', 'imagenet256', 'Environment name.')
+flags.DEFINE_string('save_dir', "/home/lambda/jax-vqvae-vqgan/chkpts/checkpoint", 'Save dir (if not None, save params).')
+flags.DEFINE_string('load_dir', "./checkpointbest.tmp.tmp" , 'Load dir (if not None, load params from here).')
+flags.DEFINE_integer('seed', 0, 'Random seed.')
+flags.DEFINE_integer('log_interval', 1000, 'Logging interval.')
+flags.DEFINE_integer('eval_interval', 1000, 'Eval interval.')
+flags.DEFINE_integer('save_interval', 1000, 'Save interval.')
+flags.DEFINE_integer('batch_size', 64, 'Total Batch size.')
+flags.DEFINE_integer('max_steps', int(1_000_000), 'Number of training steps.')
+model_config = ml_collections.ConfigDict({
+    # VQVAE
+    'lr': 0.0001,
+    'beta1': 0.0,#.5
+    'beta2': 0.99,#.9
+    'lr_warmup_steps': 4000,
+    'lr_decay_steps': 1_000_000, #They use 'lambdalr'
+    'filters': 128,
+    'num_res_blocks': 2,
+    'channel_multipliers': (1, 1, 2, 2, 4),
+    'embedding_dim': 16,
+    'norm_type': 'GN',
+    'weight_decay': 0.05,#None maybe?
+    'clip_gradient': 1.0,
+    'l2_loss_weight': 1.0,#They use L1 actually
+    'eps_update_rate': 0.9999,
+    # Quantizer
+    'quantizer_type': 'ae', # or 'fsq', 'kl'
+    # Quantizer (VQ)
+    'quantizer_loss_ratio': 1,
+    'codebook_size': 1024,
+    'entropy_loss_ratio': 0.1,
+    'entropy_loss_type': 'softmax',
+    'entropy_temperature': 0.01,
+    'commitment_cost': 0.25,
+    # Quantizer (FSQ)
+    'fsq_levels': 5, # Bins per dimension.
+    # Quantizer (KL)
+    'kl_weight': 0.000001,#They use 1e-6 on their stuff LUL. .001 is the default
+    # GAN
+    'g_adversarial_loss_weight': 0.5,
+    'g_grad_penalty_cost': 10,
+    'perceptual_loss_weight': 0.5,
+    'gan_warmup_steps': 100000,#50000, #Temporary extra time
+    "pl_decay": 0.01,
+    "pl_weight": -1,
+    'MMD_weight': 1.0
+})
+wandb_config = default_wandb_config()
+wandb_config.update({
+    'project': 'vqvae',
+    'name': 'vqvae_{dataset_name}',
+})
+config_flags.DEFINE_config_dict('wandb', wandb_config, lock_config=False)
+config_flags.DEFINE_config_dict('model', model_config, lock_config=False)
+##############################################
+## Model Definitions.
+##############################################
+@jax.vmap
+def sigmoid_cross_entropy_with_logits(*, labels: jnp.ndarray, logits: jnp.ndarray) -> jnp.ndarray:
+    """https://github.com/google-research/maskgit/blob/main/maskgit/libml/losses.py
+    """
+    zeros = jnp.zeros_like(logits, dtype=logits.dtype)
+    condition = (logits >= zeros)
+    relu_logits = jnp.where(condition, logits, zeros)
+    neg_abs_logits = jnp.where(condition, -logits, logits)
+    return relu_logits - logits * labels + jnp.log1p(jnp.exp(neg_abs_logits))
+class VQGANModel(flax.struct.PyTreeNode):
+    rng: Any
+    config: dict = flax.struct.field(pytree_node=False)
+    vqvae: TrainState
+    vqvae_eps: TrainState
+    discriminator: TrainState
+    # Train G and D.
+    @partial(jax.pmap, axis_name='data', in_axes=(0, 0))
+    def update(self, images, pmap_axis='data'):
+        new_rng, curr_key = jax.random.split(self.rng, 2)
+        resnet, resnet_params = get_pretrained_model('resnet50', 'data/resnet_pretrained.npy')
+        is_gan_training = 1.0 - (self.vqvae.step < self.config['gan_warmup_steps']).astype(jnp.float32)
+        #Maybe only start GAN way later on?
+        def loss_fn(params_vqvae, params_disc):
+            def path_reg_loss(latents, targets):#let's have pl_mean be in our self.config
+                #1/2 should be our spatial dimensions.
+                latents = latents[0:2, :, :, :]
+                targets = targets[0:2, :, :, :]
+                pl_noise = jax.random.normal(new_rng, shape = targets.shape) / jnp.sqrt(targets.shape[1] * targets.shape[2])
+                def grad_sum(latents, pl_noise):#So we don't have access to the actual decode method
+                    #return jnp.sum(self.vqvae.decode(latents))
+                    #I am not sure if this makes any sense whatsoever tbh
+                    my_sum = self.vqvae(latents, params=params_vqvae, method="decode", rngs={'noise': curr_key})*pl_noise
+                    print("Decode shape", my_sum.shape)
+                    return jnp.sum(my_sum)
+                decode_grad_fn = jax.grad(grad_sum)
+                pl_grads = decode_grad_fn(latents, pl_noise)
+                pl_lengths = jnp.sqrt(jnp.mean(jnp.sum(jnp.square(pl_grads), axis = [2,3]), axis = 1))
+                #pl_lengths = jnp.sqrt(jnp.mean(jnp.sum(jnp.square(pl_grads), axis=2), axis=3))
+                pl_mean = self.vqvae.pl_mean + self.config.pl_decay * (jnp.mean(pl_lengths) - self.vqvae.pl_mean)
+                pl_penalty = jnp.square(pl_lengths - pl_mean)
+                loss = jnp.mean(pl_penalty)
+                return loss, pl_mean
+            if self.config.pl_weight != -1:
+                smooth_loss, pl_mean = path_reg_loss(result_dict["latents"], reconstructed_images)
+               # self.vqvae.replace(pl_mean = pl_mean)
+            #We need to update pl mean in self.vqvae
+             # Reconstruct image
+            reconstructed_images, result_dict = self.vqvae(images, params=params_vqvae, rngs={'noise': curr_key})
+            print("Reconstructed images shape", reconstructed_images.shape)
+            print("Input images shape", images.shape)
+            assert reconstructed_images.shape == images.shape
+            # GAN loss on VQVAE output.
+            discriminator_fn = lambda x: self.discriminator(x, params=params_disc)
+            real_logit, vjp_fn = jax.vjp(discriminator_fn, images, has_aux=False)
+            gradient = vjp_fn(jnp.ones_like(real_logit))[0] # Gradient of discriminator output wrt. real images.
+            gradient = gradient.reshape((images.shape[0], -1))
+            gradient = jnp.asarray(gradient, jnp.float32)
+            penalty = jnp.sum(jnp.square(gradient), axis=-1)
+            penalty = jnp.mean(penalty) # Gradient penalty for training D.
+            fake_logit = discriminator_fn(reconstructed_images)
+            d_loss_real = sigmoid_cross_entropy_with_logits(labels=jnp.ones_like(real_logit), logits=real_logit).mean()
+            d_loss_fake = sigmoid_cross_entropy_with_logits(labels=jnp.zeros_like(fake_logit), logits=fake_logit).mean()
+            loss_d = d_loss_real + d_loss_fake + (penalty * self.config['g_grad_penalty_cost'])
+            d_loss_for_vae = sigmoid_cross_entropy_with_logits(labels=jnp.ones_like(fake_logit), logits=fake_logit).mean()
+            d_loss_for_vae = d_loss_for_vae * is_gan_training
+            real_pools, _ = get_pretrained_embs(resnet_params, resnet, images=images)
+            fake_pools, _ = get_pretrained_embs(resnet_params, resnet, images=reconstructed_images)
+            perceptual_loss = jnp.mean((real_pools - fake_pools)**2)
+            l2_loss = jnp.mean((reconstructed_images - images) ** 2)
+            quantizer_loss = result_dict['quantizer_loss'] if 'quantizer_loss' in result_dict else 0.0
+            if self.config['quantizer_type'] == 'kl' or self.config["quantizer_type"] == "kl_two":
+                quantizer_loss = quantizer_loss * self.config['kl_weight']
+            elif self.config["quantizer_type"] == "MMD":
+                quantizer_loss = quantizer_loss * self.config['MMD_weight']
+            loss_vae = (l2_loss * FLAGS.model['l2_loss_weight']) \
+                + (quantizer_loss * FLAGS.model['quantizer_loss_ratio']) \
+                + (d_loss_for_vae * FLAGS.model['g_adversarial_loss_weight']) \
+                + (perceptual_loss * FLAGS.model['perceptual_loss_weight']) \
+                #+ (smooth_loss * FLAGS.model['pl_weight'] )
+            codebook_usage = result_dict['usage'] if 'usage' in result_dict else 0.0
+            return_dict = {
+                'loss_vae': loss_vae,
+                'loss_d': loss_d,
+                'l2_loss': l2_loss,
+                'd_loss_for_vae': d_loss_for_vae,
+                'perceptual_loss': perceptual_loss,
+                'quantizer_loss': quantizer_loss,
+                'codebook_usage': codebook_usage,
+                #'pl_loss': smooth_loss,
+            }
+            if self.config["pl_weight"] != -1:
+                loss_vae += (smooth_loss * FLAGS.model["pl_weight"])
+                return_dict["pl_mean"] = pl_mean
+                return_dict["smooth_loss"] = smooth_loss
+            return (loss_vae, loss_d), return_dict
+        # This is a fancy way to do 'jax.grad' so (loss_vae, params_vqvae) and (loss_d, params_disc) are differentiated.
+        _, grad_fn, info = jax.vjp(loss_fn, self.vqvae.params, self.discriminator.params, has_aux=True)
+        vae_grads, _ = grad_fn((1., 0.))
+        _, d_grads = grad_fn((0., 1.))
+        vae_grads = jax.lax.pmean(vae_grads, axis_name=pmap_axis)
+        d_grads = jax.lax.pmean(d_grads, axis_name=pmap_axis)
+        d_grads = jax.tree.map(lambda x: x * is_gan_training, d_grads)
+        info = jax.lax.pmean(info, axis_name=pmap_axis)
+        if self.config['quantizer_type'] == 'fsq':
+            info['codebook_usage'] = jnp.sum(info['codebook_usage'] > 0) / info['codebook_usage'].shape[-1]
+        updates, new_opt_state = self.vqvae.tx.update(vae_grads, self.vqvae.opt_state, self.vqvae.params)
+        new_params = optax.apply_updates(self.vqvae.params, updates)
+        if self.config["pl_weight"] != -1:
+            new_vqvae = self.vqvae.replace(step=self.vqvae.step + 1, params=new_params, opt_state=new_opt_state, pl_mean=info["pl_mean"])
+        else:
+            new_vqvae = self.vqvae.replace(step=self.vqvae.step + 1, params=new_params, opt_state=new_opt_state)
+        updates, new_opt_state = self.discriminator.tx.update(d_grads, self.discriminator.opt_state, self.discriminator.params)
+        new_params = optax.apply_updates(self.discriminator.params, updates)
+        new_discriminator = self.discriminator.replace(step=self.discriminator.step + 1, params=new_params, opt_state=new_opt_state)
+        info['grad_norm_vae'] = optax.global_norm(vae_grads)
+        info['grad_norm_d'] = optax.global_norm(d_grads)
+        info['update_norm'] = optax.global_norm(updates)
+        info['param_norm'] = optax.global_norm(new_params)
+        info['is_gan_training'] = is_gan_training
+        new_vqvae_eps = target_update(new_vqvae, self.vqvae_eps, 1-self.config['eps_update_rate'])
+        new_model = self.replace(rng=new_rng, vqvae=new_vqvae, vqvae_eps=new_vqvae_eps, discriminator=new_discriminator)
+        return new_model, info
+    @partial(jax.pmap, axis_name='data', in_axes=(0, 0))
+    def reconstruction(self, images, pmap_axis='data', sampling = True):
+        if not sampling:
+            reconstructed_images, _ = self.vqvae_eps(images)
+        else:#Not sure what our theoretical sampling mode does
+            new_rng, curr_key = jax.random.split(self.rng, 2)
+            reconstructed_images, _ = self.vqvae_eps(images, rngs={'noise': curr_key})
+        reconstructed_images = jnp.clip(reconstructed_images, 0, 1)
+        return reconstructed_images
+    @partial(jax.pmap, axis_name='data', in_axes=(0, 0))
+    def reconstruction_sampling(self, images, pmap_axis='data'):
+        reconstructed_images_determistic, _ = self.vqvae_eps(images)
+        new_rng, curr_key = jax.random.split(self.rng, 2)
+        reconstructed_images_sample, result_dict = self.vqvae(images, rngs={'noise': curr_key})
+        #We don't need to return the result dict.
+        reconstructed_images_determistic = jnp.clip(reconstructed_images_determistic, 0, 1)
+        reconstructed_images_sample = jnp.clip(reconstructed_images_sample, 0, 1)
+        return reconstructed_images_determistic, reconstructed_images_sample
+    @partial(jax.pmap, axis_name='data', in_axes=(0, 0))
+    def reconstruction_interpolation(self, images, pmap_axis='data'):
+        #So we *have* our two images. We are going to linearly interpolate between them in... latent space
+        #But also in image space?
+        #Sure, why not
+        reconstructed_images_determistic, _ = self.vqvae_eps(images)
+        new_rng, curr_key = jax.random.split(self.rng, 2)
+        reconstructed_images_sample, result_dict = self.vqvae(images, rngs={'noise': curr_key})
+        #We don't need to return the result dict.
+        reconstructed_images_determistic = jnp.clip(reconstructed_images_determistic, 0, 1)
+        reconstructed_images_sample = jnp.clip(reconstructed_images_sample, 0, 1)
+        return reconstructed_images_determistic, reconstructed_images_sample
+    @partial(jax.pmap, axis_name='data', in_axes=(0, 0))
+    def get_latent(self, images, pmap_axis='data'):
+        #We do *not* add the noise ourselves, just save it.
+        latents, result_dict = self.vqvae_eps(images, params=self.vqvae_eps.params, method="encode")
+        # reconstructed_images, result_dict_two = self.vqvae_eps(images)
+        # reconstructed_images = jnp.clip(reconstructed_images, 0, 1)
+        #
+        #
+        # decoded = self.vqvae_eps(latents, params=self.vqvae_eps.params, method="decode")
+        # decoded = jnp.clip(decoded, 0, 1)
+        #reconstructed images should be correct
+        return latents, result_dict#, result_dict_two, reconstructed_images, decoded
+    @partial(jax.pmap, axis_name='data', in_axes=(0, 0))
+    def reconstruction_noisy(self, images, pmap_axis='data'):
+        noises = []
+        numbers = np.arange(0.00, 1.0, 0.01)
+        for number in numbers:
+            noises.append(float(number))
+        #So 3 things to try out.
+        #One is normalize variance of the latents before adding noise, start there
+        #The second is plot snr instead.
+        #snr = var(latent)/var(noise)
+        #var is std^2
+        #This return the full reconstruction, but *also* the latents.
+        reconstructed_images, result_dict = self.vqvae_eps(images)
+        latents = result_dict["latents"]
+        std = result_dict["std"]
+        #We need to check the latnes std
+        #Get rng for creating noise.
+        new_rng, curr_key = jax.random.split(self.rng, 2)
+        decode = []
+        latent_std = latents.std(axis = [1,2,3]).reshape(-1,1,1,1)
+        for mult in noises:
+            noise = jax.random.normal(curr_key, latents.shape)
+            #Combine noise with latents
+            if True:
+                latent_var = latent_std ** 2
+                noise_std = mult*noise.std()#noise std should be around 1
+                noise_var = mult ** 2
+                if noise_var == 0:#If noise is zero, then instead denominator is it's variance
+                    snr = 0
+                else:
+                    snr = latent_var/noise_var
+            temp_latents = latents + noise*mult
+            #vae_eps is the determinstic one.
+            decoded = self.vqvae_eps(temp_latents, params=self.vqvae_eps.params, method="decode")
+            decoded = jnp.clip(decoded, 0, 1)
+            if True:
+                decode.append((decoded, snr))
+        reconstructed_images = jnp.clip(reconstructed_images, 0, 1)
+        return reconstructed_images, decode, std
+    @partial(jax.pmap, axis_name='data', in_axes=(0, 0))
+    def reconstruction_ppl(self, images, pmap_axis='data'):
+        epsilon = .0001
+        reconstructed_images, result_dict = self.vqvae_eps(images)
+        latents = result_dict["latents"]
+        std = result_dict["std"]
+        new_rng, curr_key = jax.random.split(self.rng, 2)
+        noise = jax.random.normal(curr_key, latents.shape)
+        #Combine noise with latents
+        temp_latents = latents + noise * epsilon
+        # print(temp_latents.shape)#Probably should be like, bs, 32,32,4
+        # exit()
+        decoded = self.vqvae_eps(temp_latents, params=self.vqvae_eps.params, method="decode")
+        decoded = jnp.clip(decoded, 0, 1)
+        reconstructed_images = jnp.clip(reconstructed_images, 0, 1)
+        return reconstructed_images, decoded, std, latents
+    #So this method simply will return the gradient/jacobian
+    @partial(jax.pmap, axis_name='data', in_axes=(0, 0))
+    def reconstruction_grad_distance(self, images, pmap_axis='data'):
+        #We want to try and identify C.
+        #C means that when we change our latents by a specific and small number X, our outputs change by C*X also.
+        #We want to capture all of the C, and see what their STD is.
+        pass
+    @partial(jax.pmap, axis_name='data', in_axes=(0, 0))
+    def reconstruction_ppl_two(self, images, pmap_axis='data'):
+        epsilon = .0001
+        reconstructed_images, result_dict = self.vqvae_eps(images)
+        latents = result_dict["latents"]
+        std = result_dict["std"]
+        new_rng, curr_key = jax.random.split(self.rng, 2)
+        noise = jax.random.normal(curr_key, latents.shape)
+        #Combine noise with latents
+        temp_latents = latents + noise/2 * epsilon
+        decoded = self.vqvae_eps(temp_latents, params=self.vqvae_eps.params, method="decode")
+        decoded = jnp.clip(decoded, 0, 1)
+        temp_latents_2 = latents + -1 * noise/2 * epsilon
+        decoded_2 = self.vqvae_eps(temp_latents_2, params=self.vqvae_eps.params, method="decode")
+        decoded_2 = jnp.clip(decoded_2, 0, 1)
+        reconstructed_images = jnp.clip(reconstructed_images, 0, 1)
+        return reconstructed_images, decoded, std, latents, decoded_2
+    @partial(jax.pmap, axis_name='data', in_axes=(0, 0))
+    def reconstruction_ppl_image(self, images, pmap_axis='data'):
+        epsilon = .0001
+        new_rng, curr_key = jax.random.split(self.rng, 2)
+        reconstructed_images, result_dict = self.vqvae_eps(images)
+        latents = result_dict["latents"]
+        std = result_dict["std"]
+        noise = jax.random.normal(curr_key, images.shape)
+        images = images + noise * epsilon
+        decoded, result_dict_2 = self.vqvae_eps(images)
+        decoded = jnp.clip(decoded, 0, 1)
+        latents_noisy = result_dict_2["latents"]
+        std_noisy = result_dict_2["std"]
+        reconstructed_images = jnp.clip(reconstructed_images, 0, 1)
+        return reconstructed_images, decoded, std, latents, std_noisy, latents_noisy
+##############################################
+## Training Code.
+##############################################
+def main(_):
+    np.random.seed(FLAGS.seed)
+    print("Using devices", jax.local_devices())
+    device_count = len(jax.local_devices())
+    global_device_count = jax.device_count()
+    local_batch_size = FLAGS.batch_size // (global_device_count // device_count)
+    print("Device count", device_count)
+    print("Global device count", global_device_count)
+    print("Global Batch: ", FLAGS.batch_size)
+    print("Node Batch: ", local_batch_size)
+    print("Device Batch:", local_batch_size // device_count)
+    # Create wandb logger
+    if jax.process_index() == 0:
+        setup_wandb(FLAGS.model.to_dict(), **FLAGS.wandb)
+    def get_dataset(is_train):
+        if 'imagenet' in FLAGS.dataset_name:
+            def deserialization_fn(data):
+                image = data['image']
+                min_side = tf.minimum(tf.shape(image)[0], tf.shape(image)[1])
+                image = tf.image.resize_with_crop_or_pad(image, min_side, min_side)
+                if 'imagenet256' in FLAGS.dataset_name:
+                    image = tf.image.resize(image, (256, 256))
+                elif 'imagenet128' in FLAGS.dataset_name:
+                    image = tf.image.resize(image, (128, 128))
+                else:
+                    raise ValueError(f"Unknown dataset {FLAGS.dataset_name}")
+                if is_train:
+                    image = tf.image.random_flip_left_right(image)
+                image = tf.cast(image, tf.float32) / 255.0
+                return image
+            split = tfds.split_for_jax_process('train' if is_train else 'validation', drop_remainder=True)
+            print(split)
+            dataset = tfds.load('imagenet2012', split=split, data_dir = "/dev/shm")
+            dataset = dataset.map(deserialization_fn, num_parallel_calls=tf.data.AUTOTUNE)
+            dataset = dataset.shuffle(10000, seed=42, reshuffle_each_iteration=True)
+            dataset = dataset.repeat()
+            dataset = dataset.batch(local_batch_size)
+            dataset = dataset.prefetch(tf.data.AUTOTUNE)
+            dataset = tfds.as_numpy(dataset)
+            dataset = iter(dataset)
+            return dataset
+        else:
+            raise ValueError(f"Unknown dataset {FLAGS.dataset_name}")
+    dataset = get_dataset(is_train=True)
+    dataset_valid = get_dataset(is_train=False)
+    example_obs = next(dataset)[:1]
+    get_fid_activations = get_fid_network()
+    if not os.path.exists('./data/imagenet256_fidstats_openai.npz'):
+        raise ValueError("Please download the FID stats file! See the README.")
+    truth_fid_stats = np.load('data/imagenet256_fidstats_openai.npz')
+    #truth_fid_stats = np.load("./base_stats.npz")
+    rng = jax.random.PRNGKey(FLAGS.seed)
+    rng, param_key = jax.random.split(rng)
+    print("Total Memory on device:", float(jax.local_devices()[0].memory_stats()['bytes_limit']) / 1024**3, "GB")
+    ###################################
+    # Creating Model and put on devices.
+    ###################################
+    FLAGS.model.image_channels = example_obs.shape[-1]
+    FLAGS.model.image_size = example_obs.shape[1]
+    vqvae_def = VQVAE(FLAGS.model, train=True)
+    vqvae_params = vqvae_def.init({'params': param_key, 'noise': param_key}, example_obs)['params']
+    tx = optax.adam(learning_rate=FLAGS.model['lr'], b1=FLAGS.model['beta1'], b2=FLAGS.model['beta2'])
+    vqvae_ts = TrainState.create(vqvae_def, vqvae_params, tx=tx)
+    vqvae_def_eps = VQVAE(FLAGS.model, train=False)
+    vqvae_eps_ts = TrainState.create(vqvae_def_eps, vqvae_params)
+    print("Total num of VQVAE parameters:", sum(x.size for x in jax.tree_util.tree_leaves(vqvae_params)))
+    discriminator_def = Discriminator(FLAGS.model)
+    discriminator_params = discriminator_def.init(param_key, example_obs)['params']
+    tx = optax.adam(learning_rate=FLAGS.model['lr'], b1=FLAGS.model['beta1'], b2=FLAGS.model['beta2'])
+    discriminator_ts = TrainState.create(discriminator_def, discriminator_params, tx=tx)
+    print("Total num of Discriminator parameters:", sum(x.size for x in jax.tree_util.tree_leaves(discriminator_params)))
+    model = VQGANModel(rng=rng, vqvae=vqvae_ts, vqvae_eps=vqvae_eps_ts, discriminator=discriminator_ts, config=FLAGS.model)
+    if FLAGS.load_dir is not None:
+        try:
+            cp = Checkpoint(FLAGS.load_dir)
+            model = cp.load_model(model)
+            print("Loaded model with step", model.vqvae.step)
+        except:
+            print("Random init")
+    else:
+        print("Random init")
+    model = flax.jax_utils.replicate(model, devices=jax.local_devices())
+    jax.debug.visualize_array_sharding(model.vqvae.params['decoder']['Conv_0']['bias'])
+    ###################################
+    # Train Loop
+    ###################################
+    best_fid = 100000
+    for i in tqdm.tqdm(range(1, FLAGS.max_steps + 1),
+                       smoothing=0.1,
+                       dynamic_ncols=True):
+        batch_images = next(dataset)
+        batch_images = batch_images.reshape((len(jax.local_devices()), -1, *batch_images.shape[1:])) # [devices, batch//devices, etc..]
+        model, update_info = model.update(batch_images)
+        if i % FLAGS.log_interval == 0:
+            update_info = jax.tree.map(lambda x: x.mean(), update_info)
+            train_metrics = {f'training/{k}': v for k, v in update_info.items()}
+            if jax.process_index() == 0:
+                wandb.log(train_metrics, step=i)
+        if i % FLAGS.eval_interval == 0:
+            # Print some images
+            reconstructed_images = model.reconstruction(batch_images) # [devices, 8, 256, 256, 3]
+            valid_images = next(dataset_valid)
+            valid_images = valid_images.reshape((len(jax.local_devices()), -1, *valid_images.shape[1:])) # [devices, batch//devices, etc..]
+            valid_reconstructed_images = model.reconstruction(valid_images) # [devices, 8, 256, 256, 3]
+            if jax.process_index() == 0:
+                wandb.log({'batch_image_mean': batch_images.mean()}, step=i)
+                wandb.log({'reconstructed_images_mean': reconstructed_images.mean()}, step=i)
+                wandb.log({'batch_image_std': batch_images.std()}, step=i)
+                wandb.log({'reconstructed_images_std': reconstructed_images.std()}, step=i)
+                # plot comparison witah matplotlib. put each reconstruction side by side.
+                fig, axs = plt.subplots(2, 8, figsize=(30, 15))
+                #print("batch shape", batch_images.shape)#batch shape (4, 32, 256, 256, 3) #THE FIRST SHAPE IS DEVICES
+                #print("recon shape", reconstructed_images.shape)#it's all the same lol
+                #print("valid shape", valid_images.shape)
+                #it seems to be made for 8 device, aka tpuv3 instead
+                for j in range(4):#fuck it
+                    axs[0, j].imshow(batch_images[j, 0], vmin=0, vmax=1)
+                    axs[1, j].imshow(reconstructed_images[j, 0], vmin=0, vmax=1)
+                wandb.log({'reconstruction': wandb.Image(fig)}, step=i)
+                plt.close(fig)
+                fig, axs = plt.subplots(2, 8, figsize=(30, 15))
+                for j in range(4):
+                    axs[0, j].imshow(valid_images[j, 0], vmin=0, vmax=1)
+                    axs[1, j].imshow(valid_reconstructed_images[j, 0], vmin=0, vmax=1)
+                wandb.log({'reconstruction_valid': wandb.Image(fig)}, step=i)
+                plt.close(fig)
+            # Validation Losses
+            _, valid_update_info = model.update(valid_images)
+            valid_update_info = jax.tree.map(lambda x: x.mean(), valid_update_info)
+            valid_metrics = {f'validation/{k}': v for k, v in valid_update_info.items()}
+            if jax.process_index() == 0:
+                wandb.log(valid_metrics, step=i)
+            # FID measurement.
+            activations = []
+            activations2 = []
+            for _ in range(780):#This is apprximately 40k
+                valid_images = next(dataset_valid)
+                valid_images = valid_images.reshape((len(jax.local_devices()), -1, *valid_images.shape[1:])) # [devices, batch//devices, etc..]
+                valid_reconstructed_images = model.reconstruction(valid_images) # [devices, 8, 256, 256, 3]
+                valid_reconstructed_images = jax.image.resize(valid_reconstructed_images, (valid_images.shape[0], valid_images.shape[1], 299, 299, 3),
+                                                               method='bilinear', antialias=False)
+                valid_reconstructed_images = 2 * valid_reconstructed_images - 1
+                activations += [np.array(get_fid_activations(valid_reconstructed_images))[..., 0, 0, :]]
+                #Only needed when we save
+                #valid_reconstructed_images = jax.image.resize(valid_images, (valid_images.shape[0], valid_images.shape[1], 299, 299, 3),
+                                                               #method='bilinear', antialias=False)
+                #valid_reconstructed_images = 2 * valid_reconstructed_images - 1
+                #activations2 += [np.array(get_fid_activations(valid_reconstructed_images))[..., 0, 0, :]]
+                # TODO: use all_gather to get activations from all devices.
+            #This seems to be FID with only 64 images?
+            activations = np.concatenate(activations, axis=0)
+            activations = activations.reshape((-1, activations.shape[-1]))
+ #           activations2 = np.concatenate(activations2, axis = 0)
+ #           activations2 = activations2.reshape((-1, activations2.shape[-1]))
+            print("doing this much FID", activations.shape)#8192, 2048 should be 2048 items then I guess
+            mu1 = np.mean(activations, axis=0)
+            sigma1 = np.cov(activations, rowvar=False)
+            fid = fid_from_stats(mu1, sigma1, truth_fid_stats['mu'], truth_fid_stats['sigma'])
+#            mu2 = np.mean(activations2, axis = 0)
+#            sigma2 = np.cov(activations2, rowvar = False)
+            #save mu2 and sigma2
+            #And then exit for now
+#            np.savez("base.npz", mu = mu2, sigma = sigma2)
+#            exit()
+            #Used with loading base
+            #fid = fid_from_stats(mu1, sigma1, mu2, sigma2)
+            if jax.process_index() == 0:
+                wandb.log({'validation/fid': fid}, step=i)
+                print("validation FID at step", i, fid)
+                #Then if fid is smaller than previous best FID, save new FID
+                if fid < best_fid:
+                    model_single = flax.jax_utils.unreplicate(model)
+                    cp = Checkpoint(FLAGS.save_dir + "best.tmp")
+                    cp.set_model(model_single)
+                    cp.save()
+                    best_fid = fid
+        if (i % FLAGS.save_interval == 0) and (FLAGS.save_dir is not None):
+            if jax.process_index() == 0:
+                model_single = flax.jax_utils.unreplicate(model)
+                cp = Checkpoint(FLAGS.save_dir)
+                cp.set_model(model_single)
+                cp.save()
+if __name__ == '__main__':
+    app.run(main)