File size: 2,801 Bytes
4f3a734
fcd3028
 
 
4f3a734
cc00226
 
 
4f3a734
490caf2
 
fcd3028
51e1870
 
dd0d62a
049c713
dd0d62a
049c713
51e1870
049c713
 
490caf2
 
049c713
 
 
 
 
490caf2
049c713
 
 
 
490caf2
 
51e1870
 
 
490caf2
51e1870
 
 
 
 
 
490caf2
51e1870
 
 
 
cc00226
 
 
 
fcd3028
490caf2
 
cc00226
 
 
 
 
 
 
 
490caf2
cc00226
 
 
 
 
 
 
 
 
 
 
 
 
4f3a734
490caf2
4f3a734
 
490caf2
 
 
233e561
490caf2
cc00226
4f3a734
 
490caf2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import gradio as gr
import tensorflow as tf
import numpy as np
from PIL import Image

# Load VGG for feature extraction
vgg = tf.keras.applications.VGG19(include_top=False, weights="imagenet")
vgg.trainable = False

STYLE_LAYERS = [...]  # your style layers
CONTENT_LAYER = [...] # your content layer(s)


def gram_matrix(A):
    """Compute Gram matrix for style representation.
    Works for VGG layer outputs of shape (batch, H, W, C).
    """
    A = tf.convert_to_tensor(A)

    if len(A.shape) == 4:
        batch, H, W, C = A.shape
        A = tf.reshape(A, (batch, H * W, C))
        gram = tf.matmul(A, A, transpose_a=True) / tf.cast(H * W, tf.float32)
    elif len(A.shape) == 3:
        batch, N, C = A.shape
        gram = tf.matmul(A, A, transpose_a=True) / tf.cast(N, tf.float32)
    elif len(A.shape) == 2:
        N, C = A.shape
        A = tf.expand_dims(A, 0)  
        gram = tf.matmul(A, A, transpose_a=True) / tf.cast(N, tf.float32)
    else:
        raise ValueError(f"Unexpected tensor rank for gram_matrix: {A.shape}")
    return gram


def compute_content_cost(a_C, a_G):
    return tf.reduce_mean(tf.square(a_C - a_G))


def compute_style_cost(a_S, a_G):
    J_style = 0
    for s, g in zip(a_S, a_G):
        J_style += tf.reduce_mean(tf.square(gram_matrix(s) - gram_matrix(g)))
    return J_style / len(a_S)


def total_cost(J_content, J_style, alpha=10, beta=40):
    return alpha * J_content + beta * J_style


def preprocess(img):
    img = Image.fromarray(img).resize((256, 256))
    arr = np.expand_dims(np.array(img) / 255.0, axis=0).astype(np.float32)
    return tf.convert_to_tensor(arr)


def style_transfer(content, style, steps):
    content_tensor = preprocess(content)
    style_tensor = preprocess(style)

    a_C = vgg(content_tensor)
    a_S = vgg(style_tensor)

    generated_image = tf.Variable(content_tensor)
    opt = tf.keras.optimizers.Adam(learning_rate=0.01)

    for i in range(steps):
        with tf.GradientTape() as tape:
            a_G = vgg(generated_image)
            J_style = compute_style_cost(a_S, a_G)
            J_content = compute_content_cost(a_C, a_G)
            J = total_cost(J_content, J_style, alpha=10, beta=40)

        grad = tape.gradient(J, generated_image)
        opt.apply_gradients([(grad, generated_image)])
        generated_image.assign(tf.clip_by_value(generated_image, 0.0, 1.0))

    out_img = (generated_image[0].numpy() * 255).astype("uint8")
    return Image.fromarray(out_img)


demo = gr.Interface(
    fn=style_transfer,
    inputs=[
        gr.Image(type="numpy", label="Content Image"), 
        gr.Image(type="numpy", label="Style Image"),
        gr.Slider(50, 2000, value=1000, step=50, label="Number of Iterations")
    ],
    outputs=gr.Image(type="pil", label="Stylized Image"),
)

demo.launch()