Spaces:

binaychandra
/

svd_demo

Sleeping

File size: 6,232 Bytes

import gradio as gr
import numpy as np
import cv2
from PIL import Image
import sys
import os

def get_size_in_mb(array):
    return array.nbytes / (1024 * 1024)

def load_default_image():
    script_dir = os.path.dirname(os.path.abspath(__file__))
    default_image_path = os.path.join(script_dir, "kittens_cute.jpg")
    if os.path.exists(default_image_path):
        return Image.open(default_image_path)
    return None

def perform_svd(image, r):
    if image is None:
        return None, "Please upload an image first."
        
    try:
        # Convert PIL Image to numpy array
        image_array = np.array(image)
        
        # Convert to grayscale if not already
        if len(image_array.shape) == 3:
            image_array = cv2.cvtColor(image_array, cv2.COLOR_RGB2GRAY)

        # Store original info
        original_size_mb = get_size_in_mb(image_array)
        
        # Perform SVD
        U, S, Vt = np.linalg.svd(image_array, full_matrices=False)
        
        # Ensure r is not larger than the minimum dimension
        max_r = min(image_array.shape)
        r = min(r, max_r)
        
        # Reconstruct the image using the top r singular values
        S_r = np.diag(S[:r])
        reconstructed = np.dot(U[:, :r], np.dot(S_r, Vt[:r, :]))

        # Clip values to valid range and convert to uint8
        reconstructed = np.clip(reconstructed, 0, 255).astype(np.uint8)

        height, width = image_array.shape
        decomposed_elements = (height * r) + r + (width * r)
        decomposed_bytes = decomposed_elements * image_array.itemsize
        original_bytes = image_array.size * image_array.itemsize
        
        def bytes_to_human_readable(num_bytes):
            for unit in ["B", "KB", "MB", "GB", "TB"]:
                if num_bytes < 1024:
                    return f"{num_bytes:.2f} {unit}"
                num_bytes /= 1024
            return f"{num_bytes:.2f} TB"

        decomposed_hr = bytes_to_human_readable(decomposed_bytes)
        original_hr = bytes_to_human_readable(original_bytes)
        info = f"Original Image: {height}x{width} => {image_array.size} elements => {original_hr}\n" \
               f"Decomposed Image: ({height}*{r} + {r} + {width}*{r}) => {decomposed_elements} => {decomposed_hr}\n" \
               f"Energy Retained: {(np.sum(S[:r]) / np.sum(S) * 100):.2f}%"

        return Image.fromarray(reconstructed), info
    except Exception as e:
        return None, f"Error processing image: {str(e)}"

def clear_outputs():
    return None, "Waiting for generation..."

def get_image_svd_info(image):
    if image is None:
        return gr.update(value=50, maximum=100)
    
    arr = np.array(image)
    if len(arr.shape) == 3:
        arr = cv2.cvtColor(arr, cv2.COLOR_RGB2GRAY)
    
    # Set maximum R to min dimension of the image
    max_r = min(arr.shape)
    initial_r = min(max_r, 50)  # Default to 50 if the dimension is larger
    
    return gr.update(value=initial_r, maximum=max_r)

# Gradio interface
def svd_interface(image, r):
    return perform_svd(image, r)

with gr.Blocks(theme=gr.themes.Soft(), css="footer {visibility: hidden !important;}") as demo:
    gr.Markdown("""# Image Compression using SVD
    Upload an image and adjust the number of singular values (R), then click 'Generate' to see the decomposed image.
    """)

    with gr.Row():
        with gr.Column(scale=1):
            # Load default image and get its dimensions for initial R slider setup
            default_img = load_default_image()
            image_input = gr.Image(
                type="pil", 
                label="Upload Image", 
                height=450,
                width=450,
                sources=["upload", "webcam"],
                #sources=[],
                value=default_img
            )
            
            with gr.Column():
                initial_max = 100
                if default_img is not None:
                    img_array = np.array(default_img)
                    if len(img_array.shape) == 3:
                        img_array = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
                    initial_max = min(img_array.shape)
                
                r_slider = gr.Slider(
                    minimum=1,
                    maximum=initial_max,
                    step=5,
                    value=min(50, initial_max),
                    label="Number of Singular Values (R)"
                )
            # Row specifically for the button, placed after the slider
            with gr.Row():
                # Left spacer column (takes up (1 - 0.3) / 2 = 35% of the width)
                gr.Column(scale=2, min_width=0)
                # Middle column for the button (takes up 30% of the width)
                with gr.Column(scale=6, min_width=0):
                     # Button takes full width of this middle column
                     submit_btn = gr.Button("Generate", variant="primary", scale=1)
                # Right spacer column (takes up 35% of the width)
                gr.Column(scale=2, min_width=0)
        with gr.Column(scale=1):
            output_image = gr.Image(
                label="Reconstructed Image",
                height=450,
                width=450
            )
            info_output = gr.Textbox(
                label="Image Statistics",
                interactive=False,
                lines=4,
                value="Click Generate to see the decomposition"
            )

    # Link inputs and outputs for real-time updates
    image_input.change(
        fn=get_image_svd_info,
        inputs=[image_input],
        outputs=[r_slider],
        queue=False
    ).then(
        fn=clear_outputs,
        inputs=[],
        outputs=[output_image, info_output],
        queue=False
    )

    # Link submit button to SVD processing
    submit_btn.click(
        fn=svd_interface,
        inputs=[image_input, r_slider],
        outputs=[output_image, info_output]
    )

    gr.Markdown("""
    ### Tips:
    - Try different R values and click Generate to see the effect
    - Higher R preserves more details but results in larger file size
    - The compression ratio shows how much smaller the compressed version is
    """)

demo.launch()