|
|
import gradio as gr |
|
|
from functools import partial |
|
|
import torch |
|
|
import spaces |
|
|
|
|
|
import DDCM_blind_face_image_restoration |
|
|
import latent_DDCM_CCFG |
|
|
import latent_DDCM_compression |
|
|
from latent_models import load_model |
|
|
import os |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if os.getenv("SPACES_ZERO_GPU") == "true": |
|
|
os.environ["SPACES_ZERO_GPU"] = "1" |
|
|
|
|
|
|
|
|
avail_models = {'512x512': load_model('sd2-community/stable-diffusion-2-1-base', 1000, float16=True, device=torch.device("cpu"), compile=False)[0], |
|
|
'768x768': load_model('sd2-community/stable-diffusion-2-1', 1000, float16=True, device=torch.device("cpu"), compile=False)[0] |
|
|
} |
|
|
|
|
|
compression_func = partial(latent_DDCM_compression.main, avail_models=avail_models) |
|
|
|
|
|
|
|
|
def get_t_and_k_from_file_name(file_name): |
|
|
T = int(file_name.split('T')[1].split('-')[0]) |
|
|
K = int(file_name.split('K')[1].split('-')[0]) |
|
|
model_type = file_name.split('M')[1].split('-')[0] |
|
|
return T, K, model_type |
|
|
|
|
|
|
|
|
def ccfg(text_input, T, K, ccfg_scale, model_type, compressed_file_in=None): |
|
|
return latent_DDCM_CCFG.main(text_input, T, K, min(ccfg_scale, K), model_type, compressed_file_in, |
|
|
avail_models=avail_models) |
|
|
|
|
|
|
|
|
|
|
|
@spaces.GPU |
|
|
def decompress_given_bitstream(bitstream, method): |
|
|
if bitstream is None: |
|
|
gr.Error("Please provide a bit-stream file when performing decompression") |
|
|
file_name = bitstream.name |
|
|
T, K, model_type = get_t_and_k_from_file_name(file_name) |
|
|
if method == 'compression': |
|
|
return compression_func(None, T, K, model_type, bitstream) |
|
|
elif method == 'blind': |
|
|
return DDCM_blind_face_image_restoration.inference(None, T, K, 'NIQE', 1, True, bitstream) |
|
|
elif method == 'ccfg': |
|
|
return ccfg(None, T, K, -1, model_type, bitstream) |
|
|
else: |
|
|
raise NotImplementedError() |
|
|
|
|
|
|
|
|
def validate_K(K): |
|
|
if (K & (K - 1)) != 0: |
|
|
gr.Warning("For efficient bit usage, K should be a power of 2.") |
|
|
|
|
|
|
|
|
method_to_func = { |
|
|
'compression': partial(decompress_given_bitstream, method='compression'), |
|
|
'blind': partial(decompress_given_bitstream, method='blind'), |
|
|
'ccfg': partial(decompress_given_bitstream, method='ccfg'), |
|
|
} |
|
|
|
|
|
title = "<div style='text-align: center; font-size: 36px; font-weight: bold;'>Compressed Image Generation with Denoising Diffusion Codebook Models</div>" |
|
|
intro = """ |
|
|
<h3 style="margin-bottom: 10px; text-align: center;"> |
|
|
<a href="https://ohayonguy.github.io/">Guy Ohayon*</a> , |
|
|
<a href="https://hilamanor.github.io/">Hila Manor*</a> , |
|
|
<a href="https://tomer.net.technion.ac.il/">Tomer Michaeli</a> , |
|
|
<a href="https://elad.cs.technion.ac.il/">Michael Elad</a> |
|
|
</h3> |
|
|
<p style="font-size: 12px; text-align: center; margin-bottom: 10px;"> |
|
|
* Equal contribution |
|
|
</p> |
|
|
<h4 style="margin-bottom: 10px; text-align: center;"> |
|
|
Technion - Israel Institute of Technology |
|
|
</h5> |
|
|
<h3 style="margin-bottom: 10px; text-align: center;"> |
|
|
<a href="https://www.arxiv.org/abs/2502.01189/">[Paper]</a> | |
|
|
<a href="https://ddcm-2025.github.io/">[Project Page]</a> | |
|
|
<a href="https://github.com/DDCM-2025/ddcm-compressed-image-generation/">[Code]</a> |
|
|
</h3> |
|
|
</br></br> |
|
|
Denoising Diffusion Codebook Models (DDCM) is a novel (and simple) generative approach based on any Denoising Diffusion Model (DDM), that is able to produce high-quality image samples along with their losslessly compressed bit-stream representations. |
|
|
DDCM can easily be utilized for perceptual image compression, as well as for solving a variety of compressed conditional generation tasks such as text-conditional image generation and image restoration, where each generated sample is accompanied by a compressed bit-stream. |
|
|
</br></br> |
|
|
The tabs below correspond to demos of different practical applications. Open each tab to see the application's specific instructions. |
|
|
</br></br> |
|
|
<b>Note: The demos below rely on relatively old pre-trained diffusion models such as Stable Diffusion 2.1 |
|
|
(Mirrored by <a href="https://huggingface.co/sd2-community">sd2-community</a>), simply for the purpose of demonstrating the capabilities of DDCM. Feel free to implement our DDCM-based methods using newer diffusion models to further improve performance.</b> |
|
|
""" |
|
|
|
|
|
article = r""" |
|
|
If you find our work useful, please ⭐ our <a href='https://github.com/DDCM-2025/ddcm-compressed-image-generation' target='_blank'>GitHub repository</a>. Thanks! |
|
|
|
|
|
📝 **Citation** |
|
|
```bibtex |
|
|
@article{ohayon2025compressedimagegenerationdenoising, |
|
|
title={Compressed Image Generation with Denoising Diffusion Codebook Models}, |
|
|
author={Guy Ohayon and Hila Manor and Tomer Michaeli and Michael Elad}, |
|
|
year={2025}, |
|
|
eprint={2502.01189}, |
|
|
journal={arXiv}, |
|
|
primaryClass={eess.IV}, |
|
|
url={https://arxiv.org/abs/2502.01189}, |
|
|
} |
|
|
``` |
|
|
|
|
|
📋 **License** |
|
|
This project is released under the <a rel="license" href="https://github.com/DDCM-2025/ddcm-compressed-image-generation/blob/master/LICENSE">MIT license</a>. |
|
|
|
|
|
📧 **Contact** |
|
|
If you have any questions, please feel free to contact us at <b>guyoep@gmail.com</b> (Guy Ohayon) and <b>hila.manor@campus.technion.ac.il</b> (Hila Manor). |
|
|
""" |
|
|
|
|
|
custom_css = """ |
|
|
.tabs button { |
|
|
font-size: 21px !important; |
|
|
font-weight: bold !important; |
|
|
} |
|
|
""" |
|
|
|
|
|
with gr.Blocks(theme=gr.themes.Soft(), css=custom_css) as demo: |
|
|
gr.HTML(title) |
|
|
gr.HTML(intro) |
|
|
|
|
|
|
|
|
with gr.Tab("Image Compression"): |
|
|
gr.Markdown( |
|
|
"- To change the bit rate, modify the number of diffusion timesteps (T) and/or the codebook sizes (K).") |
|
|
gr.Markdown("- The input image will be center-cropped and resized to the specified size (512x512 or 768x768).") |
|
|
|
|
|
|
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(scale=2): |
|
|
input_image = gr.Image(label="Input image", scale=2, image_mode='RGB', type='pil') |
|
|
with gr.Group(): |
|
|
with gr.Row(): |
|
|
T = gr.Number(label="Diffusion timesteps (T)", minimum=50, maximum=1000, value=1000, scale=2) |
|
|
K = gr.Number(label="Size of each codebook (K)", minimum=2, maximum=8192, value=2048, scale=3) |
|
|
with gr.Row(): |
|
|
model_type = gr.Radio(["768x768", "512x512"], label="Image size", value="512x512") |
|
|
compress = gr.Button("Compress image") |
|
|
|
|
|
with gr.Column(scale=3): |
|
|
decompressed_image = gr.Image(label="Decompressed image", scale=2) |
|
|
compressed_file_out = gr.File(label="Compressed bit-stream (output)", scale=0) |
|
|
|
|
|
compress.click(validate_K, inputs=[K]).then(compression_func, inputs=[input_image, T, K, model_type], |
|
|
outputs=[decompressed_image, compressed_file_out]) |
|
|
|
|
|
gr.Examples([ |
|
|
["examples/compression/1.jpg", 1000, 256, '512x512'], |
|
|
["examples/compression/2.jpg", 1000, 256, '512x512'], |
|
|
["examples/compression/4.jpg", 1000, 256, '512x512'], |
|
|
["examples/compression/7.jpg", 1000, 256, '512x512'], |
|
|
["examples/compression/8.jpg", 1000, 256, '512x512'], |
|
|
["examples/compression/13.jpg", 1000, 256, '512x512'], |
|
|
["examples/compression/15.jpg", 1000, 256, '512x512'], |
|
|
["examples/compression/17.jpg", 1000, 256, '512x512'], |
|
|
["examples/compression/18.jpg", 1000, 256, '512x512'], |
|
|
["examples/compression/19.jpg", 1000, 256, '512x512'], |
|
|
["examples/compression/21.jpg", 1000, 256, '512x512'], |
|
|
["examples/compression/22.jpg", 1000, 256, '512x512'], |
|
|
["examples/compression/23.jpg", 1000, 256, '512x512'], |
|
|
], |
|
|
inputs=[input_image, T, K, model_type], |
|
|
outputs=[decompressed_image, compressed_file_out], |
|
|
fn=compression_func, |
|
|
cache_examples='lazy') |
|
|
|
|
|
gr.Markdown("### Decompress a previously generated bit-stream") |
|
|
with gr.Row(): |
|
|
with gr.Column(scale=2): |
|
|
bitstream = gr.File(label="Compressed bit-stream (input)", scale=0) |
|
|
decompress = gr.Button("Decompress image") |
|
|
|
|
|
with gr.Column(scale=3): |
|
|
decompressed_image = gr.Image(label="Decompressed image (from uploaded bit-stream)", scale=2) |
|
|
|
|
|
decompress.click(method_to_func['compression'], inputs=bitstream, outputs=decompressed_image) |
|
|
|
|
|
with gr.Tab("Real-World Face Image Restoration"): |
|
|
gr.Markdown( |
|
|
"Please mark if your input face image is already aligned. " |
|
|
"If not, we will try to automatically detect, crop and align the faces, and raise an error if no faces are found. Expect better results if your input image is already aligned.") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(scale=2): |
|
|
with gr.Group(): |
|
|
input_image = gr.Image(label="Input image", scale=2, type='filepath') |
|
|
aligned = gr.Checkbox(label='Input face image is aligned') |
|
|
with gr.Group(): |
|
|
with gr.Row(): |
|
|
T = gr.Number(label="Diffusion timesteps (T)", minimum=50, maximum=1000, value=1000) |
|
|
K = gr.Number(label="Size of each codebook (K)", minimum=2, maximum=8192, value=2048) |
|
|
iqa_metric = gr.Radio(['NIQE', 'TOPIQ', 'CLIP-IQA'], label='Perceptual quality measure to optimize', |
|
|
value='NIQE') |
|
|
iqa_coef = gr.Number( |
|
|
label="Perception-distortion tradeoff coefficient (λ)", |
|
|
info="Higher -> better perceptual quality", |
|
|
|
|
|
minimum=0, maximum=1, value=1) |
|
|
restore = gr.Button("Restore and compress") |
|
|
|
|
|
with gr.Column(scale=3): |
|
|
decompressed_image = gr.Gallery(label="Restored faces gallery", type="numpy", show_label=True, |
|
|
format="png") |
|
|
compressed_file_out = gr.File(label="Compressed bit-stream (output)", scale=0, file_count='multiple') |
|
|
|
|
|
restore.click(validate_K, inputs=[K]).then(DDCM_blind_face_image_restoration.inference, |
|
|
inputs=[input_image, T, K, iqa_metric, iqa_coef, aligned], |
|
|
outputs=[decompressed_image, compressed_file_out]) |
|
|
gr.Examples([ |
|
|
["examples/bfr/00000055.png", 1000, 4096, 'TOPIQ', 0.1, True], |
|
|
["examples/bfr/00000085.png", 1000, 4096, 'TOPIQ', 0.1, True], |
|
|
["examples/bfr/00000113.png", 1000, 4096, 'TOPIQ', 0.1, True], |
|
|
["examples/bfr/00000137.png", 1000, 4096, 'TOPIQ', 0.1, True], |
|
|
["examples/bfr/wider/0034.jpg", 1000, 4096, 'NIQE', 1, True], |
|
|
["examples/bfr/webphoto/00042_00.jpg", 1000, 4096, 'TOPIQ', 0.1, True], |
|
|
["examples/bfr/lfw/Ana_Palacio_0001_00.jpg", 1000, 4096, 'TOPIQ', 0.1, True], |
|
|
["examples/bfr/01.png", 1000, 4096, 'NIQE', 0.1, False], |
|
|
["examples/bfr/03.jpg", 1000, 4096, 'TOPIQ', 0.1, False], |
|
|
], |
|
|
inputs=[input_image, T, K, iqa_metric, iqa_coef, aligned], |
|
|
outputs=[decompressed_image, compressed_file_out], |
|
|
fn=DDCM_blind_face_image_restoration.inference, |
|
|
cache_examples='lazy') |
|
|
|
|
|
gr.Markdown("### Decompress a previously generated bit-stream") |
|
|
with gr.Row(): |
|
|
with gr.Column(scale=2): |
|
|
bitstream = gr.File(label="Compressed bit-stream (input)", scale=0) |
|
|
decompress = gr.Button("Decompress image") |
|
|
|
|
|
with gr.Column(scale=3): |
|
|
decompressed_image = gr.Image(label="Decompressed image (from uploaded bit-stream)", scale=2) |
|
|
|
|
|
decompress.click(method_to_func['blind'], inputs=bitstream, outputs=decompressed_image) |
|
|
|
|
|
with gr.Tab("Compressed Text-to-Image Generation"): |
|
|
gr.Markdown( |
|
|
"This application demonstrates the capabilities of our new *compressed* classifier-free guidance method, which *does not require the input condition for decompression*." |
|
|
" \n" |
|
|
"Each image is generated along with its compressed bit-stream representation, and the input condition is implicitly encoded in the bit-stream.") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(scale=2): |
|
|
with gr.Group(): |
|
|
text_input = gr.Textbox(label="Input text prompt", scale=1, value="An image of a dog") |
|
|
with gr.Row(): |
|
|
T = gr.Number(label="Diffusion timesteps (T)", minimum=50, maximum=1000, value=1000, scale=1) |
|
|
K = gr.Number(label="Size of each codebook (K)", minimum=2, maximum=256, value=128, scale=1) |
|
|
K_tilde = gr.Number(label=r"Sub-sampled codebooks' sizes (K̃)", scale=1, |
|
|
info="Behaves like a guidance scale", minimum=2, maximum=256, value=32) |
|
|
model_type = gr.Radio(["768x768", "512x512"], label="Image size", value="512x512") |
|
|
button = gr.Button("Generate and compress") |
|
|
|
|
|
with gr.Column(scale=3): |
|
|
decompressed_image = gr.Image(label="Generated image", scale=2) |
|
|
compressed_file_out = gr.File(label="Compressed bit-stream (output)", scale=0) |
|
|
|
|
|
button.click(validate_K, inputs=[K]).then(ccfg, inputs=[text_input, T, K, K_tilde, model_type], |
|
|
outputs=[decompressed_image, compressed_file_out]) |
|
|
|
|
|
gr.Examples([ |
|
|
["An image of a dog", 1000, 64, 4, '512x512'], |
|
|
["Rainbow over the mountains", 1000, 64, 4, '512x512'], |
|
|
["A cat playing soccer", 1000, 64, 4, '512x512'], |
|
|
], |
|
|
inputs=[text_input, T, K, K_tilde, model_type], |
|
|
outputs=[decompressed_image, compressed_file_out], |
|
|
fn=ccfg, |
|
|
cache_examples='lazy') |
|
|
gr.Markdown("### Decompress a previously generated bit-stream") |
|
|
with gr.Row(): |
|
|
with gr.Column(scale=2): |
|
|
bitstream = gr.File(label="Compressed bit-stream (input)", scale=0) |
|
|
button = gr.Button("Decompress") |
|
|
with gr.Column(scale=3): |
|
|
decompressed_image = gr.Image(label="Decompressed image (from uploaded bit-stream)", scale=2) |
|
|
button.click(method_to_func['ccfg'], inputs=bitstream, outputs=decompressed_image) |
|
|
|
|
|
gr.Markdown(article) |
|
|
|
|
|
demo.queue() |
|
|
demo.launch(state_session_capacity=500) |
|
|
|
|
|
|