Spaces:

vivjay30
/

cdim

Running on Zero

App Files Files Community

VIVEK JAYARAM commited on Oct 17, 2024

Commit

95aa1d5

1 Parent(s): c63740a

KL, categorical kl, and poisson noise

Browse files

Files changed (6) hide show

cdim/diffusion/diffusion_pipeline.py +47 -8
cdim/discrete_kl_loss.py +34 -0
cdim/noise.py +15 -0
inference.py +6 -2
noise_configs/bimodal_noise_config.yaml +2 -0
noise_configs/poisson_noise_config.yaml +1 -1

cdim/diffusion/diffusion_pipeline.py CHANGED Viewed

@@ -2,6 +2,17 @@ import torch
 from tqdm import tqdm
 from cdim.image_utils import randn_tensor
 @torch.no_grad()
@@ -16,7 +27,8 @@ def run_diffusion(
         K=5,
         image_dim=256,
         image_channels=3,
-        model_type="diffusers"
     ):
     batch_size = noisy_observation.shape[0]
     image_shape = (batch_size, image_channels, image_dim, image_dim)
@@ -44,13 +56,40 @@ def run_diffusion(
                 model_output = model_output.sample if model_type == "diffusers" else model_output[:, :3]
                 x_0 = (image - beta_prod_t_prev ** (0.5) * model_output) / alpha_prod_t_prev ** (0.5)
-                distance = operator(x_0) - noisy_observation
-                if (distance ** 2).mean() < noise_function.sigma ** 2:
-                    break
-                loss = ((distance) ** 2).mean()
-                print(loss.mean())
-                loss.mean().backward()
-            image -= 15 / torch.linalg.norm(image.grad) * image.grad
     return image

 from tqdm import tqdm
 from cdim.image_utils import randn_tensor
+from cdim.discrete_kl_loss import discrete_kl_loss
+def compute_kl_gaussian(residuals, sigma):
+    # Only 0 centered for now
+    if sigma == 0:
+        raise ValueError("Can't do KL Divergence when sigma is 0")
+    sample_mean = (residuals).mean()
+    sample_var = (((residuals - sample_mean) **2).mean())
+    kl_div = torch.log(sample_var**0.5 / sigma) + (sigma**2 + sample_mean**2) / (2*sample_var) - 0.5
+    print(f"KL Divergence {kl_div}")
+    return kl_div
 @torch.no_grad()
         K=5,
         image_dim=256,
         image_channels=3,
+        model_type="diffusers",
+        loss_type="l2"
     ):
     batch_size = noisy_observation.shape[0]
     image_shape = (batch_size, image_channels, image_dim, image_dim)
                 model_output = model_output.sample if model_type == "diffusers" else model_output[:, :3]
                 x_0 = (image - beta_prod_t_prev ** (0.5) * model_output) / alpha_prod_t_prev ** (0.5)
+                if loss_type == "l2" and noise_function.name == "gaussian":
+                    distance = operator(x_0) - noisy_observation
+                    if (distance ** 2).mean() < noise_function.sigma ** 2:
+                        break
+                    loss = ((distance) ** 2).mean()
+                    print(f"L2 loss {loss}")
+                    loss.backward()
+                elif loss_type == "kl" and noise_function.name == "gaussian":
+                    diff = (operator(x_0) - noisy_observation)  # Residuals
+                    kl_div = compute_kl_gaussian(diff, noise_function.sigma)
+                    kl_div.backward()
+                elif loss_type == "kl" and noise_function.name == "poisson":
+                    residuals = (operator(x_0) * noise_function.rate - noisy_observation * noise_function.rate) * 127.5  # Residuals
+                    x_0_pixel = operator((x_0 + 1) * 127.5)
+                    mask = x_0_pixel > 2 # Avoid numeric issues with pixel values near 0
+                    pearson = residuals[mask] / torch.sqrt(x_0_pixel[mask] * noise_function.rate)
+                    pearson_flat = pearson.view(-1)
+                    kl_div = compute_kl_gaussian(pearson_flat, 1.0)
+                    kl_div.backward()
+                elif loss_type == "categorical_kl" and noise_function.name == "bimodal":
+                    diff = (operator(x_0) - noisy_observation)
+                    indices = operator(torch.ones(image.shape).to(device))
+                    diff = diff[indices > 0]  # Don't consider masked out pixels in the distribution
+                    empirical_distribution = noise_function.sample_noise_distribution(image).to(device).view(-1)
+                    loss = discrete_kl_loss(diff, empirical_distribution, num_bins=15)
+                    print(f"Categorical KL {loss}")
+                    loss.backward()
+                else:
+                    raise ValueError(f"Unsupported combination: loss {loss_type} noise {noise_function.name}")
+            image -= 5 / torch.linalg.norm(image.grad) * image.grad
     return image

cdim/discrete_kl_loss.py ADDED Viewed

	@@ -0,0 +1,34 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+def discrete_kl_loss(pred, target, num_bins=20, epsilon=1e-8):
+    # Determine range for binning
+    with torch.no_grad():
+        combined = torch.cat([pred, target])
+        min_val = combined.min().item()
+        max_val = combined.max().item()
+    # Create bin edges
+    bin_edges = torch.linspace(min_val, max_val, num_bins + 1, device=pred.device)
+    bin_widths = bin_edges[1:] - bin_edges[:-1]
+    # Compute soft histogram
+    def soft_histogram(x):
+        x_expanded = x.unsqueeze(-1)
+        deltas = torch.abs(x_expanded - bin_edges[:-1].unsqueeze(0))
+        weights = torch.clamp(1 - deltas / bin_widths, min=0, max=1)
+        hist = weights.sum(dim=0) / len(x)
+        return hist
+    pred_hist = soft_histogram(pred)
+    target_hist = soft_histogram(target)
+    # Add epsilon and normalize
+    pred_probs = (pred_hist + epsilon) / (pred_hist.sum() + num_bins * epsilon)
+    target_probs = (target_hist + epsilon) / (target_hist.sum() + num_bins * epsilon)
+    # Compute KL divergence
+    kl_div = F.kl_div(pred_probs.log(), target_probs, reduction='sum')
+    return kl_div

cdim/noise.py CHANGED Viewed

@@ -58,3 +58,18 @@ class PoissonNoise(Noise):
         data = data * 2.0 - 1.0
         data = data.clamp(-1, 1)
         return data.to(device)

         data = data * 2.0 - 1.0
         data = data.clamp(-1, 1)
         return data.to(device)
+@register_noise(name='bimodal')
+class BimodalNoise(Noise):
+    def __init__(self, value):
+        self.value = value
+        self.name = 'bimodal'
+    def __call__(self, data):
+        noise = self.sample_noise_distribution(data)
+        return data + noise.to(data.device)
+    def sample_noise_distribution(self, data):
+        return (torch.randint(low=0, high=2, size=data.shape) * 2 - 1) * self.value

inference.py CHANGED Viewed

@@ -87,7 +87,8 @@ def main(args):
         noisy_measurement, operator, noise_function, device,
         num_inference_steps=args.T,
         K=args.K,
-        model_type=model_type)
     print(f"total time {time.time() - t0}")
     save_to_image(output_image, os.path.join(args.output_dir, "output.png"))
@@ -97,11 +98,14 @@ if __name__ == '__main__':
     parser.add_argument("input_image", type=str)
     parser.add_argument("T", type=int)
     parser.add_argument("K", type=int)
-    parser.add_argument("model", type=str)
     parser.add_argument("operator_config", type=str)
     parser.add_argument("noise_config", type=str)
     parser.add_argument("model_config", type=str)
     parser.add_argument("--output-dir", default=".", type=str)
     parser.add_argument("--cuda", default=True, action=argparse.BooleanOptionalAction)
     main(parser.parse_args())

         noisy_measurement, operator, noise_function, device,
         num_inference_steps=args.T,
         K=args.K,
+        model_type=model_type,
+        loss_type=args.loss)
     print(f"total time {time.time() - t0}")
     save_to_image(output_image, os.path.join(args.output_dir, "output.png"))
     parser.add_argument("input_image", type=str)
     parser.add_argument("T", type=int)
     parser.add_argument("K", type=int)
     parser.add_argument("operator_config", type=str)
     parser.add_argument("noise_config", type=str)
     parser.add_argument("model_config", type=str)
     parser.add_argument("--output-dir", default=".", type=str)
+    parser.add_argument("--loss", type=str,
+        choices=['l2', 'kl', 'categorical_kl'], default='l2',
+        help="Algorithm to use. Options: 'l2', 'kl', 'categorical_kl'. Default is 'l2'."
+    )
     parser.add_argument("--cuda", default=True, action=argparse.BooleanOptionalAction)
     main(parser.parse_args())

noise_configs/bimodal_noise_config.yaml ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ name: "bimodal"
2	+ value: 0.75

noise_configs/poisson_noise_config.yaml CHANGED Viewed

	@@ -1,2 +1,2 @@
1	name: poisson
2	- rate: 0.1


1	name: poisson
2	+ rate: 0.05