| | |
| | |
| |
|
| | import tensorflow as tf |
| |
|
| | |
| | def load_and_prep_image(filename, img_shape=224, scale=True): |
| | """ |
| | Reads in an image from filename, turns it into a tensor and reshapes into |
| | (224, 224, 3). |
| | |
| | Parameters |
| | ---------- |
| | filename (str): string filename of target image |
| | img_shape (int): size to resize target image to, default 224 |
| | scale (bool): whether to scale pixel values to range(0, 1), default True |
| | """ |
| | |
| | img = tf.io.read_file(filename) |
| | |
| | img = tf.image.decode_jpeg(img) |
| | |
| | img = tf.image.resize(img, [img_shape, img_shape]) |
| | if scale: |
| | |
| | return img/255. |
| | else: |
| | return img |
| |
|
| | |
| | |
| | import itertools |
| | import matplotlib.pyplot as plt |
| | import numpy as np |
| | from sklearn.metrics import confusion_matrix |
| |
|
| | |
| | def make_confusion_matrix(y_true, y_pred, classes=None, figsize=(10, 10), text_size=15, norm=False, savefig=False): |
| | """Makes a labelled confusion matrix comparing predictions and ground truth labels. |
| | |
| | If classes is passed, confusion matrix will be labelled, if not, integer class values |
| | will be used. |
| | |
| | Args: |
| | y_true: Array of truth labels (must be same shape as y_pred). |
| | y_pred: Array of predicted labels (must be same shape as y_true). |
| | classes: Array of class labels (e.g. string form). If `None`, integer labels are used. |
| | figsize: Size of output figure (default=(10, 10)). |
| | text_size: Size of output figure text (default=15). |
| | norm: normalize values or not (default=False). |
| | savefig: save confusion matrix to file (default=False). |
| | |
| | Returns: |
| | A labelled confusion matrix plot comparing y_true and y_pred. |
| | |
| | Example usage: |
| | make_confusion_matrix(y_true=test_labels, # ground truth test labels |
| | y_pred=y_preds, # predicted labels |
| | classes=class_names, # array of class label names |
| | figsize=(15, 15), |
| | text_size=10) |
| | """ |
| | |
| | cm = confusion_matrix(y_true, y_pred) |
| | cm_norm = cm.astype("float") / cm.sum(axis=1)[:, np.newaxis] |
| | n_classes = cm.shape[0] |
| |
|
| | |
| | fig, ax = plt.subplots(figsize=figsize) |
| | cax = ax.matshow(cm, cmap=plt.cm.Blues) |
| | fig.colorbar(cax) |
| |
|
| | |
| | if classes: |
| | labels = classes |
| | else: |
| | labels = np.arange(cm.shape[0]) |
| | |
| | |
| | ax.set(title="Confusion Matrix", |
| | xlabel="Predicted label", |
| | ylabel="True label", |
| | xticks=np.arange(n_classes), |
| | yticks=np.arange(n_classes), |
| | xticklabels=labels, |
| | yticklabels=labels) |
| | |
| | |
| | ax.xaxis.set_label_position("bottom") |
| | ax.xaxis.tick_bottom() |
| |
|
| | |
| | threshold = (cm.max() + cm.min()) / 2. |
| |
|
| | |
| | for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): |
| | if norm: |
| | plt.text(j, i, f"{cm[i, j]} ({cm_norm[i, j]*100:.1f}%)", |
| | horizontalalignment="center", |
| | color="white" if cm[i, j] > threshold else "black", |
| | size=text_size) |
| | else: |
| | plt.text(j, i, f"{cm[i, j]}", |
| | horizontalalignment="center", |
| | color="white" if cm[i, j] > threshold else "black", |
| | size=text_size) |
| |
|
| | |
| | if savefig: |
| | fig.savefig("confusion_matrix.png") |
| | |
| | |
| | def pred_and_plot(model, filename, class_names): |
| | """ |
| | Imports an image located at filename, makes a prediction on it with |
| | a trained model and plots the image with the predicted class as the title. |
| | """ |
| | |
| | img = load_and_prep_image(filename) |
| |
|
| | |
| | pred = model.predict(tf.expand_dims(img, axis=0)) |
| |
|
| | |
| | if len(pred[0]) > 1: |
| | pred_class = class_names[pred.argmax()] |
| | else: |
| | pred_class = class_names[int(tf.round(pred)[0][0])] |
| |
|
| | |
| | plt.imshow(img) |
| | plt.title(f"Prediction: {pred_class}") |
| | plt.axis(False); |
| | |
| | import datetime |
| |
|
| | def create_tensorboard_callback(dir_name, experiment_name): |
| | """ |
| | Creates a TensorBoard callback instand to store log files. |
| | |
| | Stores log files with the filepath: |
| | "dir_name/experiment_name/current_datetime/" |
| | |
| | Args: |
| | dir_name: target directory to store TensorBoard log files |
| | experiment_name: name of experiment directory (e.g. efficientnet_model_1) |
| | """ |
| | log_dir = dir_name + "/" + experiment_name + "/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") |
| | tensorboard_callback = tf.keras.callbacks.TensorBoard( |
| | log_dir=log_dir |
| | ) |
| | print(f"Saving TensorBoard log files to: {log_dir}") |
| | return tensorboard_callback |
| |
|
| | |
| | import matplotlib.pyplot as plt |
| |
|
| | def plot_loss_curves(history): |
| | """ |
| | Returns separate loss curves for training and validation metrics. |
| | |
| | Args: |
| | history: TensorFlow model History object (see: https://www.tensorflow.org/api_docs/python/tf/keras/callbacks/History) |
| | """ |
| | loss = history.history['loss'] |
| | val_loss = history.history['val_loss'] |
| |
|
| | accuracy = history.history['accuracy'] |
| | val_accuracy = history.history['val_accuracy'] |
| |
|
| | epochs = range(len(history.history['loss'])) |
| |
|
| | |
| | plt.plot(epochs, loss, label='training_loss') |
| | plt.plot(epochs, val_loss, label='val_loss') |
| | plt.title('Loss') |
| | plt.xlabel('Epochs') |
| | plt.legend() |
| |
|
| | |
| | plt.figure() |
| | plt.plot(epochs, accuracy, label='training_accuracy') |
| | plt.plot(epochs, val_accuracy, label='val_accuracy') |
| | plt.title('Accuracy') |
| | plt.xlabel('Epochs') |
| | plt.legend(); |
| |
|
| | def compare_historys(original_history, new_history, initial_epochs=5): |
| | """ |
| | Compares two TensorFlow model History objects. |
| | |
| | Args: |
| | original_history: History object from original model (before new_history) |
| | new_history: History object from continued model training (after original_history) |
| | initial_epochs: Number of epochs in original_history (new_history plot starts from here) |
| | """ |
| | |
| | |
| | acc = original_history.history["accuracy"] |
| | loss = original_history.history["loss"] |
| |
|
| | val_acc = original_history.history["val_accuracy"] |
| | val_loss = original_history.history["val_loss"] |
| |
|
| | |
| | total_acc = acc + new_history.history["accuracy"] |
| | total_loss = loss + new_history.history["loss"] |
| |
|
| | total_val_acc = val_acc + new_history.history["val_accuracy"] |
| | total_val_loss = val_loss + new_history.history["val_loss"] |
| |
|
| | |
| | plt.figure(figsize=(8, 8)) |
| | plt.subplot(2, 1, 1) |
| | plt.plot(total_acc, label='Training Accuracy') |
| | plt.plot(total_val_acc, label='Validation Accuracy') |
| | plt.plot([initial_epochs-1, initial_epochs-1], |
| | plt.ylim(), label='Start Fine Tuning') |
| | plt.legend(loc='lower right') |
| | plt.title('Training and Validation Accuracy') |
| |
|
| | plt.subplot(2, 1, 2) |
| | plt.plot(total_loss, label='Training Loss') |
| | plt.plot(total_val_loss, label='Validation Loss') |
| | plt.plot([initial_epochs-1, initial_epochs-1], |
| | plt.ylim(), label='Start Fine Tuning') |
| | plt.legend(loc='upper right') |
| | plt.title('Training and Validation Loss') |
| | plt.xlabel('epoch') |
| | plt.show() |
| | |
| | |
| | |
| | import zipfile |
| |
|
| | def unzip_data(filename): |
| | """ |
| | Unzips filename into the current working directory. |
| | |
| | Args: |
| | filename (str): a filepath to a target zip folder to be unzipped. |
| | """ |
| | zip_ref = zipfile.ZipFile(filename, "r") |
| | zip_ref.extractall() |
| | zip_ref.close() |
| |
|
| | |
| | |
| | import os |
| |
|
| | def walk_through_dir(dir_path): |
| | """ |
| | Walks through dir_path returning its contents. |
| | |
| | Args: |
| | dir_path (str): target directory |
| | |
| | Returns: |
| | A print out of: |
| | number of subdiretories in dir_path |
| | number of images (files) in each subdirectory |
| | name of each subdirectory |
| | """ |
| | for dirpath, dirnames, filenames in os.walk(dir_path): |
| | print(f"There are {len(dirnames)} directories and {len(filenames)} images in '{dirpath}'.") |
| | |
| | |
| | from sklearn.metrics import accuracy_score, precision_recall_fscore_support |
| |
|
| | def calculate_results(y_true, y_pred): |
| | """ |
| | Calculates model accuracy, precision, recall and f1 score of a binary classification model. |
| | |
| | Args: |
| | y_true: true labels in the form of a 1D array |
| | y_pred: predicted labels in the form of a 1D array |
| | |
| | Returns a dictionary of accuracy, precision, recall, f1-score. |
| | """ |
| | |
| | model_accuracy = accuracy_score(y_true, y_pred) * 100 |
| | |
| | model_precision, model_recall, model_f1, _ = precision_recall_fscore_support(y_true, y_pred, average="weighted") |
| | model_results = {"accuracy": model_accuracy, |
| | "precision": model_precision, |
| | "recall": model_recall, |
| | "f1": model_f1} |
| | return model_results |
| |
|