Spaces:

leowajda
/

diffusion_model

Paused

App Files Files Community

leowajda commited on Dec 29, 2023

Commit

7578496

1 Parent(s): 7014ab1

initial commit

Browse files

Files changed (5) hide show

.gitignore +1 -0
README.md +4 -4
app.py +130 -0
diffusion_sampler.py +152 -0
requirements.txt +180 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ .idea

README.md CHANGED Viewed

@@ -1,10 +1,10 @@
 ---
-title: Diffusion Model
 emoji: 📈
-colorFrom: pink
-colorTo: purple
 sdk: gradio
-sdk_version: 4.12.0
 app_file: app.py
 pinned: false
 license: agpl-3.0

 ---
+title: Temp Diffusion
 emoji: 📈
+colorFrom: indigo
+colorTo: pink
 sdk: gradio
+sdk_version: 4.10.0
 app_file: app.py
 pinned: false
 license: agpl-3.0

app.py ADDED Viewed

	@@ -0,0 +1,130 @@

+import gradio as gr
+from huggingface_hub import from_pretrained_keras
+from diffusion_sampler import DiffusionSampler
+scheduler_button = gr.Radio(
+    choices=["Linear", "Cosine"],
+    label="Noise Scheduler",
+    value="Linear",
+    info="""
+        Decides whether to employ a model trained with a linear scheduler,
+        as proposed by Jonathan Ho et al. in 'Denoising Diffusion Probabilistic Models',
+        or the cosine variant introduced by Alex Nichol et al. in 'Improved Denoising Diffusion Probabilistic Models'.
+    """,
+)
+sampling_button = gr.Radio(
+    choices=["DDPM", "DDIM"],
+    label="Sampling Procedure",
+    value="DDPM",
+    info="""
+        Selects either the stocasthic sampling procedure described by Jonathan Ho et al. in 'Denoising Diffusion Probabilistic Models',
+        or the implicit variant proposed by Jiaming Song et al. in 'Denoising Diffusion Implicit Models'.
+        For the latter, it is also necessary to specify the sub-sequence strategy and the number of sampling steps.
+    """,
+)
+subsequence_button = gr.Radio(
+    choices=["Linear", "Quadratic"],
+    label="Sub-Sequence",
+    value="Linear",
+    info="""
+        Specific to DDIM sampling, this parameter chooses the procedure
+        for forming the sub-sequence employed during the sampling process.
+    """,
+)
+ema_button = gr.Checkbox(
+    value=True,
+    label="Exponential Moving Average",
+    info="""
+        Whether to invoke the network with the applied exponential moving average on the model parameters.
+        Recommended for better results.
+    """
+)
+images_button = gr.Number(
+    label="Number of images to generate",
+    value=5,
+    precision=0,
+    minimum=1,
+    maximum=64,
+    info="""
+        The number of images to be generated.
+        Larger batch sizes result in longer inference times.
+    """
+)
+step_button = gr.Slider(
+    minimum=500,
+    value=1_000,
+    maximum=1_000,
+    randomize=True,
+    label="Number of sampling steps",
+    info="""
+        Relevant exclusively to DDIM sampling, this parameter determines the number of steps to be utilized during sampling.
+        The default value is set to 1000 in the case of DDPM sampling.
+    """
+)
+gallery = gr.Gallery(
+    label="""
+        Generated Flowers
+    """
+)
+linear_diffusion_model = DiffusionSampler(
+    model=from_pretrained_keras("leowajda/linear_diffusion"),
+    ema_model=from_pretrained_keras("leowajda/linear_diffusion_ema"),
+    noise_scheduler="cosine",
+)
+cosine_diffusion_model = DiffusionSampler(
+    model=from_pretrained_keras("leowajda/cosine_diffusion"),
+    ema_model=from_pretrained_keras("leowajda/cosine_diffusion_ema"),
+    noise_scheduler="cosine",
+)
+def call_model(
+        model_to_call: str,
+        sample_strategy: str = "ddim",
+        step_strategy: str = "uniform",
+        ema: bool = True,
+        steps: int = 1_000,
+        num_images: int = 0,
+):
+    diffusion_model = linear_diffusion_model if model_to_call.lower() == "linear" else cosine_diffusion_model
+    return diffusion_model.generate_images(
+        num_images=int(num_images),
+        steps=int(steps),
+        sample_strategy=sample_strategy.lower(),
+        step_strategy=step_strategy.lower(),
+        ema=ema,
+    )
+demo = gr.Interface(
+    fn=call_model,
+    inputs=[scheduler_button, sampling_button, subsequence_button, ema_button, step_button, images_button],
+    outputs=gallery,
+    cache_examples=False,
+    title="""Unconditional Image Generation Through Denoising Diffusion Implicit Models""",
+    examples=[
+        ["Linear", "DDPM", "Linear", True, 1_000, 10],
+        ["Cosine", "DDIM", "Linear", True, 750, 20],
+        ["Linear", "DDIM", "Quadratic", True, 750, 20]
+    ],
+    description="""
+        <p align="center">
+            Supervisor: <strong>Wojciech Oronowicz – Jaśkowiak, PhD</strong>
+            &emsp;
+            Author: <strong>Leonardo Wajda</strong>
+            &emsp;
+            Specialization: <strong>Intelligent Data Processing Systems</strong>
+        </p>
+    """,
+)
+demo.queue().launch()

diffusion_sampler.py ADDED Viewed

	@@ -0,0 +1,152 @@

+import numpy as np
+import tqdm as tqdm
+import tensorflow as tf
+import math
+from tensorflow import keras
+from keras.models import load_model
+def as_float32(t: tf.Tensor) -> tf.Tensor:
+    return tf.cast(t, dtype=tf.float32)
+def batch_reshape(t: tf.Tensor, x: tf.Tensor) -> tf.Tensor:
+    def inner_function(coeff: tf.Tensor) -> tf.Tensor:
+        batch_dim = tf.shape(x)[0]
+        return tf.reshape(tf.gather(coeff, t), [batch_dim, 1, 1, 1])
+    return inner_function
+class DiffusionSampler(keras.Model):
+    def __init__(
+        self,
+        model: keras.Model | str,
+        ema_model: keras.Model | str,
+        timesteps: int | None = 1_000,
+        beta_start: float | None = 1e-4,
+        beta_end: float | None = 0.02,
+        noise_scheduler: str = "linear",
+        ema: float = 0.999,
+        **kwargs,
+    ):
+        super().__init__(**kwargs)
+        self.noise_predictor = load_model(filepath=model, safe_mode=False) if isinstance(model, str) else model
+        self.ema_noise_predictor = load_model(filepath=ema_model, safe_mode=False) if isinstance(model,
+                                                                                                 str) else ema_model
+        self.ema = ema
+        self.beta_start = beta_start
+        self.beta_end = beta_end
+        self.timesteps = timesteps
+        betas = self.noise_scheduler(noise_scheduler)
+        alphas = 1.0 - betas
+        alphas_cum_prod = tf.math.cumprod(alphas, axis=0)
+        alphas_cum_prod_prev = tf.concat([tf.constant([1.0], dtype=tf.float64), alphas_cum_prod[:-1]], axis=0)
+        posterior_variances = betas * (1.0 - alphas_cum_prod_prev) / (1.0 - alphas_cum_prod)
+        self.betas = as_float32(betas)
+        self.posterior_variances = as_float32(posterior_variances)
+        self.alphas_cum_prod_prev = as_float32(alphas_cum_prod_prev)
+        self.one_minus_alphas_cum_prod = as_float32(1.0 - alphas_cum_prod)
+        self.one_minus_alphas_cum_prod_prev = as_float32(1.0 - alphas_cum_prod_prev)
+        self.sqrt_one_minus_alphas_cum_prod = as_float32(tf.sqrt(1.0 - alphas_cum_prod))
+        self.sqrt_alphas_cum_prod_prev = as_float32(tf.sqrt(alphas_cum_prod_prev))
+        self.sqrt_alphas_cum_prod = as_float32(tf.sqrt(alphas_cum_prod))
+        self.rev_sqrt_alphas_cum_prod = as_float32(1.0 / tf.sqrt(alphas_cum_prod))
+        self.rev_sqrt_alphas = as_float32(tf.sqrt(1.0 / alphas))
+    def ddpm_sample(self, pred_noise: tf.Tensor, x_t: tf.Tensor, t: tf.Tensor) -> tf.Tensor:
+        batch_dim = tf.shape(x_t)[0]
+        at_timestep = batch_reshape(t, x_t)
+        beta = at_timestep(self.betas)
+        rev_sqrt_alpha = at_timestep(self.rev_sqrt_alphas)
+        sqrt_one_minus_alpha_cum_prod = at_timestep(self.sqrt_one_minus_alphas_cum_prod)
+        posterior_variance = at_timestep(self.posterior_variances)
+        mean = rev_sqrt_alpha * (
+            x_t - (beta / sqrt_one_minus_alpha_cum_prod) * pred_noise
+        )
+        nonzero_mask = tf.reshape(
+            1 - tf.cast(tf.equal(t, 0), dtype=tf.float32), [batch_dim, 1, 1, 1]
+        )
+        random_noise = tf.random.normal(shape=x_t.shape, dtype=x_t.dtype)
+        return mean + nonzero_mask * tf.sqrt(posterior_variance) * random_noise
+    def ddim_sample(self, pred_noise: tf.Tensor, x_t: tf.Tensor, t: tf.Tensor, eta: float = 0.0) -> tf.Tensor:
+        at_timestep = batch_reshape(t, x_t)
+        sqrt_alpha_cum_prod_prev = at_timestep(self.sqrt_alphas_cum_prod_prev)
+        rev_sqrt_alpha_cum_prod = at_timestep(self.rev_sqrt_alphas_cum_prod)
+        sqrt_one_minus_alpha_cum_prod = at_timestep(self.sqrt_one_minus_alphas_cum_prod)
+        alpha_cum_prod_prev = at_timestep(self.alphas_cum_prod_prev)
+        one_minus_alpha_cum_prod = at_timestep(self.one_minus_alphas_cum_prod)
+        one_minus_alpha_cum_prod_prev = at_timestep(self.one_minus_alphas_cum_prod_prev)
+        x0_t = (
+            (x_t - (sqrt_one_minus_alpha_cum_prod * pred_noise)) * rev_sqrt_alpha_cum_prod
+        )
+        c1 = eta * tf.sqrt(
+            (one_minus_alpha_cum_prod_prev / one_minus_alpha_cum_prod) * (
+                    one_minus_alpha_cum_prod / alpha_cum_prod_prev)
+        )
+        x_t_dir = tf.sqrt(one_minus_alpha_cum_prod_prev - tf.square(c1))
+        random_noise = tf.random.normal(shape=x_t.shape, dtype=x_t.dtype)
+        return sqrt_alpha_cum_prod_prev * x0_t + x_t_dir * pred_noise + c1 * random_noise
+    def noise_scheduler(self, scheduler: str, max_beta: int = 0.02) -> tf.Tensor:
+        alpha_bar = lambda t: tf.math.cos((t + 0.008) / 1.008 * tf.constant(math.pi, dtype=tf.float64) / 2) ** 2
+        cosine_scheduler = lambda i: tf.minimum(
+            1 - alpha_bar((i + 1) / tf.cast(self.timesteps, dtype=tf.float64)) / alpha_bar(
+                i / tf.cast(self.timesteps, dtype=tf.float64)), max_beta)
+        if scheduler == "linear":
+            x = tf.linspace(start=self.beta_start, stop=self.beta_end, num=self.timesteps)
+            return tf.cast(x, dtype=tf.float64)
+        elif scheduler == "cosine":
+            x = tf.vectorized_map(fn=cosine_scheduler, elems=tf.range(self.timesteps, dtype=tf.float64))
+            return tf.cast(x, dtype=tf.float64)
+    def x_t(self, x_start: tf.Tensor, t: tf.Tensor, noise: tf.Tensor) -> tf.Tensor:
+        at_timestep = batch_reshape(t, x_start)
+        sqrt_alpha_cum_prod = at_timestep(self.sqrt_alphas_cum_prod)
+        sqrt_one_minus_alpha_cum_prod = at_timestep(self.sqrt_one_minus_alphas_cum_prod)
+        return sqrt_alpha_cum_prod * x_start + sqrt_one_minus_alpha_cum_prod * noise
+    def generate_images(
+        self,
+        num_images: int,
+        steps: int,
+        sample_strategy: str = "ddim",
+        step_strategy: str = "uniform",
+        ema: bool = True,
+    ):
+        sampling_stategies = {
+            ("ddpm", "linear"): (self.ddpm_sample, tf.range(self.timesteps, dtype=tf.float64)),
+            ("ddpm", "quadratic"): (self.ddpm_sample, tf.range(self.timesteps, dtype=tf.float64)),
+            ("ddim", "linear"): (self.ddim_sample, tf.range(steps, dtype=tf.float64)),
+            ("ddim", "quadratic"): (self.ddim_sample, tf.cast(tf.linspace(start=0.0, stop=tf.sqrt(self.timesteps * 0.8), num=steps) ** 2, dtype=tf.float64))
+        }
+        noise_predictor = self.ema_noise_predictor if ema else self.noise_predictor
+        sampler, seq = sampling_stategies[(sample_strategy, step_strategy)]
+        samples = tf.random.normal(shape=(num_images, 64, 64, 3), dtype=tf.float32)
+        for t in tqdm.tqdm(tf.reverse(seq, axis=[0])):
+            tt = tf.cast(tf.fill(dims=(num_images,), value=t), dtype=tf.int64)
+            pred_noise = noise_predictor.predict([samples, tt], verbose=0, batch_size=num_images)
+            samples = sampler(pred_noise, samples, tt, )
+        return (
+            tf.clip_by_value(samples * 127.5 + 127.5, 0.0, 255.0)
+            .numpy()
+            .astype(np.uint8)
+        )

requirements.txt ADDED Viewed

	@@ -0,0 +1,180 @@

+absl-py==1.4.0
+aiofiles==23.2.1
+altair==5.2.0
+annotated-types==0.6.0
+anyio==3.7.1
+argon2-cffi==23.1.0
+argon2-cffi-bindings==21.2.0
+array-record==0.5.0
+arrow==1.3.0
+asttokens==2.4.1
+astunparse==1.6.3
+async-lru==2.0.4
+attrs==23.1.0
+Babel==2.13.1
+beautifulsoup4==4.12.2
+bleach==6.1.0
+cachetools==5.3.2
+certifi==2023.11.17
+cffi==1.16.0
+charset-normalizer==3.3.2
+click==8.1.7
+colorama==0.4.6
+comm==0.2.0
+contourpy==1.2.0
+cycler==0.12.1
+debugpy==1.8.0
+decorator==5.1.1
+defusedxml==0.7.1
+dm-tree==0.1.8
+etils==1.5.2
+exceptiongroup==1.2.0
+executing==2.0.1
+fastapi==0.104.1
+fastjsonschema==2.19.0
+ffmpy==0.3.1
+filelock==3.13.1
+flatbuffers==23.5.26
+fonttools==4.46.0
+fqdn==1.5.1
+fsspec==2023.12.0
+gast==0.5.4
+google-auth==2.24.0
+google-auth-oauthlib==1.0.0
+google-pasta==0.2.0
+googleapis-common-protos==1.61.0
+gradio==4.8.0
+gradio_client==0.7.1
+graphviz==0.20.1
+grpcio==1.59.3
+h11==0.14.0
+h5py==3.10.0
+httpcore==1.0.2
+httpx==0.25.2
+huggingface-hub==0.19.4
+idna==3.6
+importlib-resources==6.1.1
+ipykernel==6.27.1
+ipython==8.18.1
+ipywidgets==8.1.1
+isoduration==20.11.0
+jedi==0.19.1
+Jinja2==3.1.2
+json5==0.9.14
+jsonpointer==2.4
+jsonschema==4.20.0
+jsonschema-specifications==2023.11.2
+jupyter==1.0.0
+jupyter-console==6.6.3
+jupyter-events==0.9.0
+jupyter-lsp==2.2.1
+jupyter_client==8.6.0
+jupyter_core==5.5.0
+jupyter_server==2.11.2
+jupyter_server_terminals==0.4.4
+jupyterlab==4.0.9
+jupyterlab-widgets==3.0.9
+jupyterlab_pygments==0.3.0
+jupyterlab_server==2.25.2
+keras==2.15.0
+kiwisolver==1.4.5
+libclang==16.0.6
+Markdown==3.5.1
+markdown-it-py==3.0.0
+MarkupSafe==2.1.3
+matplotlib==3.8.2
+matplotlib-inline==0.1.6
+mdurl==0.1.2
+mistune==3.0.2
+ml-dtypes==0.2.0
+nbclient==0.9.0
+nbconvert==7.12.0
+nbformat==5.9.2
+nest-asyncio==1.5.8
+notebook==7.0.6
+notebook_shim==0.2.3
+numpy==1.26.2
+oauthlib==3.2.2
+opt-einsum==3.3.0
+orjson==3.9.10
+overrides==7.4.0
+packaging==23.2
+pandas==2.1.4
+pandocfilters==1.5.0
+parso==0.8.3
+pexpect==4.9.0
+Pillow==10.1.0
+platformdirs==4.1.0
+prometheus-client==0.19.0
+promise==2.3
+prompt-toolkit==3.0.41
+protobuf==3.20.3
+psutil==5.9.6
+ptyprocess==0.7.0
+pure-eval==0.2.2
+pyasn1==0.5.1
+pyasn1-modules==0.3.0
+pycparser==2.21
+pydantic==2.5.2
+pydantic_core==2.14.5
+pydot==1.4.2
+pydub==0.25.1
+Pygments==2.17.2
+pyparsing==3.1.1
+python-dateutil==2.8.2
+python-json-logger==2.0.7
+python-multipart==0.0.6
+pytz==2023.3.post1
+PyYAML==6.0.1
+pyzmq==25.1.1
+qtconsole==5.5.1
+QtPy==2.4.1
+referencing==0.31.1
+requests==2.31.0
+requests-oauthlib==1.3.1
+rfc3339-validator==0.1.4
+rfc3986-validator==0.1.1
+rich==13.7.0
+rpds-py==0.13.2
+rsa==4.9
+semantic-version==2.10.0
+Send2Trash==1.8.2
+shellingham==1.5.4
+six==1.16.0
+sniffio==1.3.0
+soupsieve==2.5
+stack-data==0.6.3
+starlette==0.27.0
+tensorboard==2.15.1
+tensorboard-data-server==0.7.2
+tensorflow==2.15.0
+tensorflow-datasets==4.9.3
+tensorflow-estimator==2.15.0
+tensorflow-io-gcs-filesystem==0.34.0
+tensorflow-metadata==1.14.0
+termcolor==2.4.0
+terminado==0.18.0
+tinycss2==1.2.1
+toml==0.10.2
+tomli==2.0.1
+tomlkit==0.12.0
+toolz==0.12.0
+tornado==6.4
+tqdm==4.66.1
+traitlets==5.14.0
+typer==0.9.0
+types-python-dateutil==2.8.19.14
+typing_extensions==4.8.0
+tzdata==2023.3
+uri-template==1.3.0
+urllib3==2.1.0
+uvicorn==0.24.0.post1
+wcwidth==0.2.12
+webcolors==1.13
+webencodings==0.5.1
+websocket-client==1.7.0
+websockets==11.0.3
+Werkzeug==3.0.1
+widgetsnbextension==4.0.9
+wrapt==1.14.1
+zipp==3.17.0