GboyeStack commited on
Commit
483f32c
·
verified ·
1 Parent(s): 2f2f41c

Delete helper_functions.py

Browse files
Files changed (1) hide show
  1. helper_functions.py +0 -441
helper_functions.py DELETED
@@ -1,441 +0,0 @@
1
- ### We create a bunch of helpful functions throughout the course.
2
- ### Storing them here so they're easily accessible.
3
-
4
- import tensorflow as tf
5
-
6
- # Create a function to import an image and resize it to be able to be used with our model
7
- def load_and_prep_image(filename, img_shape=224, scale=True):
8
- """
9
- Reads in an image from filename, turns it into a tensor and reshapes into
10
- (224, 224, 3).
11
-
12
- Parameters
13
- ----------
14
- filename (str): string filename of target image
15
- img_shape (int): size to resize target image to, default 224
16
- scale (bool): whether to scale pixel values to range(0, 1), default True
17
- """
18
- # Read in the image
19
- img = tf.io.read_file(filename)
20
- # Decode it into a tensor
21
- img = tf.image.decode_jpeg(img)
22
- # Resize the image
23
- img = tf.image.resize(img, [img_shape, img_shape])
24
- if scale:
25
- # Rescale the image (get all values between 0 and 1)
26
- return img/255.
27
- else:
28
- return img
29
-
30
- # Note: The following confusion matrix code is a remix of Scikit-Learn's
31
- # plot_confusion_matrix function - https://scikit-learn.org/stable/modules/generated/sklearn.metrics.plot_confusion_matrix.html
32
- import itertools
33
- import matplotlib.pyplot as plt
34
- import numpy as np
35
- from sklearn.metrics import confusion_matrix
36
-
37
- # Our function needs a different name to sklearn's plot_confusion_matrix
38
- def make_confusion_matrix(y_true, y_pred, classes=None, figsize=(10, 10), text_size=15, norm=False, savefig=False):
39
- """Makes a labelled confusion matrix comparing predictions and ground truth labels.
40
-
41
- If classes is passed, confusion matrix will be labelled, if not, integer class values
42
- will be used.
43
-
44
- Args:
45
- y_true: Array of truth labels (must be same shape as y_pred).
46
- y_pred: Array of predicted labels (must be same shape as y_true).
47
- classes: Array of class labels (e.g. string form). If `None`, integer labels are used.
48
- figsize: Size of output figure (default=(10, 10)).
49
- text_size: Size of output figure text (default=15).
50
- norm: normalize values or not (default=False).
51
- savefig: save confusion matrix to file (default=False).
52
-
53
- Returns:
54
- A labelled confusion matrix plot comparing y_true and y_pred.
55
-
56
- Example usage:
57
- make_confusion_matrix(y_true=test_labels, # ground truth test labels
58
- y_pred=y_preds, # predicted labels
59
- classes=class_names, # array of class label names
60
- figsize=(15, 15),
61
- text_size=10)
62
- """
63
- # Create the confustion matrix
64
- cm = confusion_matrix(y_true, y_pred)
65
- cm_norm = cm.astype("float") / cm.sum(axis=1)[:, np.newaxis] # normalize it
66
- n_classes = cm.shape[0] # find the number of classes we're dealing with
67
-
68
- # Plot the figure and make it pretty
69
- fig, ax = plt.subplots(figsize=figsize)
70
- cax = ax.matshow(cm, cmap=plt.cm.Blues) # colors will represent how 'correct' a class is, darker == better
71
- fig.colorbar(cax)
72
-
73
- # Are there a list of classes?
74
- if classes:
75
- labels = classes
76
- else:
77
- labels = np.arange(cm.shape[0])
78
-
79
- # Label the axes
80
- ax.set(title="Confusion Matrix",
81
- xlabel="Predicted label",
82
- ylabel="True label",
83
- xticks=np.arange(n_classes), # create enough axis slots for each class
84
- yticks=np.arange(n_classes),
85
- xticklabels=labels, # axes will labeled with class names (if they exist) or ints
86
- yticklabels=labels)
87
-
88
- # Make x-axis labels appear on bottom
89
- ax.xaxis.set_label_position("bottom")
90
- ax.xaxis.tick_bottom()
91
-
92
- plt.xticks(rotation=70, fontsize=text_size)
93
- plt.yticks(fontsize=text_size)
94
-
95
- # Set the threshold for different colors
96
- threshold = (cm.max() + cm.min()) / 2.
97
-
98
- # Plot the text on each cell
99
- for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
100
- if norm:
101
- plt.text(j, i, f"{cm[i, j]} ({cm_norm[i, j]*100:.1f}%)",
102
- horizontalalignment="center",
103
- color="white" if cm[i, j] > threshold else "black",
104
- size=text_size)
105
- else:
106
- plt.text(j, i, f"{cm[i, j]}",
107
- horizontalalignment="center",
108
- color="white" if cm[i, j] > threshold else "black",
109
- size=text_size)
110
-
111
- # Save the figure to the current working directory
112
- if savefig:
113
- fig.savefig("confusion_matrix.png")
114
-
115
- # Make a function to predict on images and plot them (works with multi-class)
116
- def pred_and_plot(model, filename, class_names):
117
- """
118
- Imports an image located at filename, makes a prediction on it with
119
- a trained model and plots the image with the predicted class as the title.
120
- """
121
- # Import the target image and preprocess it
122
- img = load_and_prep_image(filename)
123
-
124
- # Make a prediction
125
- pred = model.predict(tf.expand_dims(img, axis=0))
126
-
127
- # Get the predicted class
128
- if len(pred[0]) > 1: # check for multi-class
129
- pred_class = class_names[pred.argmax()] # if more than one output, take the max
130
- else:
131
- pred_class = class_names[int(tf.round(pred)[0][0])] # if only one output, round
132
-
133
- # Plot the image and predicted class
134
- plt.imshow(img)
135
- plt.title(f"Prediction: {pred_class}")
136
- plt.axis(False);
137
-
138
- import datetime
139
-
140
- def create_tensorboard_callback(dir_name, experiment_name):
141
- """
142
- Creates a TensorBoard callback instand to store log files.
143
-
144
- Stores log files with the filepath:
145
- "dir_name/experiment_name/current_datetime/"
146
-
147
- Args:
148
- dir_name: target directory to store TensorBoard log files
149
- experiment_name: name of experiment directory (e.g. efficientnet_model_1)
150
- """
151
- log_dir = dir_name + "/" + experiment_name + "/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
152
- tensorboard_callback = tf.keras.callbacks.TensorBoard(
153
- log_dir=log_dir
154
- )
155
- print(f"Saving TensorBoard log files to: {log_dir}")
156
- return tensorboard_callback
157
-
158
- # Plot the validation and training data separately
159
- import matplotlib.pyplot as plt
160
-
161
- def plot_loss_curves(history):
162
- """
163
- Returns separate loss curves for training and validation metrics.
164
-
165
- Args:
166
- history: TensorFlow model History object (see: https://www.tensorflow.org/api_docs/python/tf/keras/callbacks/History)
167
- """
168
- loss = history.history['loss']
169
- val_loss = history.history['val_loss']
170
-
171
- accuracy = history.history['accuracy']
172
- val_accuracy = history.history['val_accuracy']
173
-
174
- epochs = range(len(history.history['loss']))
175
-
176
- # Plot loss
177
- plt.plot(epochs, loss, label='training_loss')
178
- plt.plot(epochs, val_loss, label='val_loss')
179
- plt.title('Loss')
180
- plt.xlabel('Epochs')
181
- plt.legend()
182
-
183
- # Plot accuracy
184
- plt.figure()
185
- plt.plot(epochs, accuracy, label='training_accuracy')
186
- plt.plot(epochs, val_accuracy, label='val_accuracy')
187
- plt.title('Accuracy')
188
- plt.xlabel('Epochs')
189
- plt.legend();
190
-
191
- def compare_historys(original_history, new_history, initial_epochs=5):
192
- """
193
- Compares two TensorFlow model History objects.
194
-
195
- Args:
196
- original_history: History object from original model (before new_history)
197
- new_history: History object from continued model training (after original_history)
198
- initial_epochs: Number of epochs in original_history (new_history plot starts from here)
199
- """
200
-
201
- # Get original history measurements
202
- acc = original_history.history["accuracy"]
203
- loss = original_history.history["loss"]
204
-
205
- val_acc = original_history.history["val_accuracy"]
206
- val_loss = original_history.history["val_loss"]
207
-
208
- # Combine original history with new history
209
- total_acc = acc + new_history.history["accuracy"]
210
- total_loss = loss + new_history.history["loss"]
211
-
212
- total_val_acc = val_acc + new_history.history["val_accuracy"]
213
- total_val_loss = val_loss + new_history.history["val_loss"]
214
-
215
- # Make plots
216
- plt.figure(figsize=(8, 8))
217
- plt.subplot(2, 1, 1)
218
- plt.plot(total_acc, label='Training Accuracy')
219
- plt.plot(total_val_acc, label='Validation Accuracy')
220
- plt.plot([initial_epochs-1, initial_epochs-1],
221
- plt.ylim(), label='Start Fine Tuning') # reshift plot around epochs
222
- plt.legend(loc='lower right')
223
- plt.title('Training and Validation Accuracy')
224
-
225
- plt.subplot(2, 1, 2)
226
- plt.plot(total_loss, label='Training Loss')
227
- plt.plot(total_val_loss, label='Validation Loss')
228
- plt.plot([initial_epochs-1, initial_epochs-1],
229
- plt.ylim(), label='Start Fine Tuning') # reshift plot around epochs
230
- plt.legend(loc='upper right')
231
- plt.title('Training and Validation Loss')
232
- plt.xlabel('epoch')
233
- plt.show()
234
-
235
- # Create function to unzip a zipfile into current working directory
236
- # (since we're going to be downloading and unzipping a few files)
237
- import zipfile
238
-
239
- def unzip_data(filename):
240
- """
241
- Unzips filename into the current working directory.
242
-
243
- Args:
244
- filename (str): a filepath to a target zip folder to be unzipped.
245
- """
246
- zip_ref = zipfile.ZipFile(filename, "r")
247
- zip_ref.extractall()
248
- zip_ref.close()
249
-
250
- # Walk through an image classification directory and find out how many files (images)
251
- # are in each subdirectory.
252
- import os
253
-
254
- def walk_through_dir(dir_path):
255
- """
256
- Walks through dir_path returning its contents.
257
-
258
- Args:
259
- dir_path (str): target directory
260
-
261
- Returns:
262
- A print out of:
263
- number of subdiretories in dir_path
264
- number of images (files) in each subdirectory
265
- name of each subdirectory
266
- """
267
- for dirpath, dirnames, filenames in os.walk(dir_path):
268
- print(f"There are {len(dirnames)} directories and {len(filenames)} images in '{dirpath}'.")
269
-
270
- # Function to evaluate: accuracy, precision, recall, f1-score
271
- from sklearn.metrics import accuracy_score, precision_recall_fscore_support
272
-
273
- def calculate_results(y_true, y_pred):
274
- """
275
- Calculates model accuracy, precision, recall and f1 score of a binary classification model.
276
-
277
- Args:
278
- y_true: true labels in the form of a 1D array
279
- y_pred: predicted labels in the form of a 1D array
280
-
281
- Returns a dictionary of accuracy, precision, recall, f1-score.
282
- """
283
- # Calculate model accuracy
284
- model_accuracy = accuracy_score(y_true, y_pred) * 100
285
- # Calculate model precision, recall and f1 score using "weighted average
286
- model_precision, model_recall, model_f1, _ = precision_recall_fscore_support(y_true, y_pred, average="weighted")
287
- model_results = {"accuracy": model_accuracy,
288
- "precision": model_precision,
289
- "recall": model_recall,
290
- "f1": model_f1}
291
- return model_results
292
-
293
- #Create Model Function to create a model from url
294
- def create_model(model_url, num_classes = 10):
295
- '''
296
- Takes a TensorFlow Hub Url and creates a Keras Sequential Model wwith it
297
-
298
- Args:
299
- model_url(str): A TensorFlow hub feature extraction url.
300
- num_classes(int): Number of out neurons, number of target classes default 10
301
-
302
- Returns: uncompiled model as extractor
303
-
304
- '''
305
- feature_extractor_layer = hub.KerasLayer(model_url,
306
- trainable = False,
307
- name ='feature_extraction_layer',
308
- input_shape = IMAGE_SHAPE +(3,))
309
- model = tf.keras.Sequential([feature_extractor_layer,layers.Dense(num_classes, activation ='softmax', name ='output_layer')])
310
-
311
- return model
312
-
313
-
314
-
315
- import matplotlib.pyplot as plt
316
- import matplotlib.image as mping
317
- import os
318
- import random
319
- def view_argumented(target_clas, target_dir):
320
-
321
- '''
322
- target class should be the training data +.class_name and then the target_dir should be the directory of the training data
323
-
324
- '''
325
- target_class = random.choice(percent_train.class_names)
326
- target_dir ='/content/10_food_classes_1_percent/train/' + target_class
327
- random_image = random.choice(os.listdir(target_dir))
328
- random_image_path = target_dir + '/' +random_image
329
- #Read in
330
- img = mping.imread(random_image_path)
331
- fin = plt.imshow(img)
332
- plt.title(f'Orginal Target Image from class {target_class} ')
333
- plt.axis(False)
334
-
335
-
336
- #Plot Argumented
337
- augmented_img = data_augmentation(img, training =True)
338
- plt.figure()
339
- fin2 = plt.imshow(augmented_img/255.)
340
- plt.title(f'AugumentedImage')
341
- return fin, fin2
342
-
343
- #Functionalise Model Checkpoint
344
- import datetime
345
-
346
- def create_model_checkpoint(file_name):
347
- '''
348
- Create Model Checkpoint Callback for any Model You are building
349
-
350
- Args:
351
- file_name: File_name will be the directory name which will have a timestamp to it.
352
- '''
353
-
354
- filepathdir = file_name + '/' +datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
355
- filepath = filepathdir + '.ckpt'
356
- checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(filepath =filepath,
357
- save_weight_only =True,
358
- save_best_only=False,
359
- save_freq ='epoch',
360
- verbose =1)
361
- return checkpoint_callback
362
-
363
- import tensorflow as tf
364
- def load_and_prep_image(filename, img_shape=224, scale=True):
365
- """
366
- Reads in an image from filename, turns it into a tensor and reshapes into
367
- (224, 224, 3).
368
-
369
- Parameters
370
- ----------
371
- filename (str): string filename of target image
372
- img_shape (int): size to resize target image to, default 224
373
- scale (bool): whether to scale pixel values to range(0, 1), default True
374
- """
375
- # Read in the image
376
- img = tf.io.read_file(filename)
377
- # Decode it into a tensor
378
- img = tf.io.decode_image(img)
379
- # Resize the image
380
- img = tf.image.resize(img, [img_shape, img_shape])
381
- if scale:
382
- # Rescale the image (get all values between 0 and 1)
383
- return img/255.
384
- else:
385
- return img
386
-
387
- #Preprocess Images for Tensorflow ds
388
- def preprocess_img(image, label, img_shape= 224):
389
- '''
390
- Converts image datatypes from 'unit8 too float32 and reshapes image to
391
- [img_shape, img_shape, color_channels]
392
-
393
- Args:
394
- image: image iterable to pass
395
- label: image label for tfds
396
- img_shape: image shape
397
- '''
398
- image =tf.image.resize(image, [img_shape, img_shape])
399
- #image = image/255. #uncomment to scale
400
- return tf.cast(image, tf.float32), label
401
-
402
- import os
403
- def get_lines(filename):
404
- '''
405
- Reads Filename (a text file ) and returns the lines of text as a list
406
-
407
- Args:
408
- filename: a string containin the target filepath
409
-
410
- Returns: A list of strings with one string per line from the target filename
411
- '''
412
- with open(filename, 'r') as f:
413
- return f.readlines()
414
-
415
- #Splitting text into characters level
416
-
417
- def split_chars(text):
418
- return ' '.join(list(text))
419
-
420
-
421
-
422
- def lr_schedule(epoch):
423
- """Learning Rate Schedule
424
- Learning rate is scheduled to be reduced after 80, 120, 160, 180 epochs.
425
- Called automatically every epoch as part of callbacks during training.
426
- # Arguments
427
- epoch (int): The number of epochs
428
- # Returns
429
- lr (float32): learning rate
430
- """
431
- lr = 1e-3
432
- if epoch > 180:
433
- lr *= 0.5e-3
434
- elif epoch > 160:
435
- lr *= 1e-3
436
- elif epoch > 120:
437
- lr *= 1e-2
438
- elif epoch > 80:
439
- lr *= 1e-1
440
- print('Learning rate: ', lr)
441
- return lr