File size: 3,475 Bytes
9f1467e
 
 
 
04db61e
 
 
9f1467e
 
 
04db61e
 
9f1467e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
04db61e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9f1467e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
from keras.models import load_model
import numpy as np

# Load the saved model
# model = load_model('model.h5')

# predicted_image = model.predict(np.expand_dims(input_image, axis=0))


import tensorflow as tf
from PIL import Image
import numpy as np
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Load CIFAR-10 dataset
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

# Normalize pixel values to be between 0 and 1
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0

# One-hot encode the labels
y_train = to_categorical(y_train, num_classes=10)
y_test = to_categorical(y_test, num_classes=10)






# predicted = model.predict(x_test)

# np.argmax(predicted, axis = 1)


model = Sequential([
    # First convolutional layer
    Conv2D(96, (11, 11), strides=(1, 1), activation='relu', input_shape=(32, 32, 3)),
    MaxPooling2D(pool_size=(3, 3), strides=(2, 2)),
    # Second convolutional layer
    Conv2D(256, (5, 5), padding='same', activation='relu'),
    MaxPooling2D(pool_size=(3, 3), strides=(2, 2)),
    # Third convolutional layer
    Conv2D(384, (3, 3), padding='same', activation='relu'),
    # Fourth convolutional layer
    Conv2D(384, (3, 3), padding='same', activation='relu'),
    # Fifth convolutional layer
    Conv2D(256, (3, 3), padding='same', activation='relu'),
    MaxPooling2D(pool_size=(3, 3), strides=(2, 2)),
    # Flatten the convolutional layers output for fully connected layers
    Flatten(),
    # First fully connected layer
    Dense(4096, activation='relu'),
    Dropout(0.5),
    # Second fully connected layer
    Dense(4096, activation='relu'),
    Dropout(0.5),
    # Output layer
    Dense(10, activation='softmax')
])

# Compile the model with a lower learning rate
optimizer = Adam(learning_rate=0.0001)
model.compile(optimizer=optimizer,
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Data augmentation
datagen = ImageDataGenerator(
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
)

datagen.fit(x_train)

# Train the model with data augmentation
model.fit(datagen.flow(x_train, y_train, batch_size=128), epochs=25, validation_data=(x_test, y_test))


classes = {
    0 : 'Airplane',
    1 : 'Automobile',
    2 : 'Bird',
    3 : 'Cat',
    4 : 'Deer',
    5 : 'Dog',
    6 : 'Frog',
    7 : 'Horse',
    8 : 'Ship',
    9 : 'Truck'
}

def prediction(input_img):
    # Define the transformation
    transform = transforms.Compose([
        transforms.Resize(32),
        transforms.CenterCrop(32),
        transforms.ToTensor(),
        # transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    pil_image = Image.fromarray(input_img.astype('uint8'))
    # Apply the transformation
    transformed_image = np.array(transform(pil_image).T)
    input_image = np.expand_dims(transformed_image, axis=0)
    output = model.predict(input_image)
    # print(transformed_image.shape)
    # print(transformed_image)
    # plt.imshow(transformed_image)
    # plt.show()
    return classes[np.argmax(output)]
demo = gr.Interface(prediction, gr.Image(), "text")
demo.launch()