CaesarAIShowCase / CaesarAINL /caesartrain.py
CaesarCloudSync
Caesar ShowCase Start
8ce55a3
# -*- coding: utf-8 -*-
import os
import json
#import shutil
import pickle
import warnings
import pandas as pd
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text as text
from pylab import rcParams
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelBinarizer
warnings.filterwarnings("ignore")
tf.get_logger().setLevel('ERROR')
class CaesarNLTrain:
def train(traindf,validdf,testdf,examples,history_filename = "history.png"):
intent_label_output_size = len(pd.unique(traindf["intent"]))
trainfeatures=traindf.copy()
trainlabels=trainfeatures.pop("intent")
trainfeatures=trainfeatures.values
"""One-Hot-Encoding of class-labels:"""
binarizer=LabelBinarizer()
trainlabels=binarizer.fit_transform(trainlabels.values)
"""Preprocess test- and validation data in the same way as it has been done for training-data:"""
testfeatures=testdf.copy()
testlabels=testfeatures.pop("intent")
validfeatures=validdf.copy()
validlabels=validfeatures.pop("intent")
testfeatures=testfeatures.values
validfeatures=validfeatures.values
testlabels=binarizer.transform(testlabels.values)
validlabels=binarizer.transform(validlabels.values)
pickle.dump(binarizer, open('caesarmodel/labelbinarizer.pkl', 'wb'))
bert_model_name = 'small_bert/bert_en_uncased_L-8_H-512_A-8'
with open("caesarberthubmodels/bert_to_handle.json") as f:
map_name_to_handle = json.load(f)
with open("caesarberthubmodels/bert_to_preprocess.json") as f:
map_model_to_preprocess = json.load(f)
tfhub_handle_encoder = map_name_to_handle[bert_model_name]
tfhub_handle_preprocess = map_model_to_preprocess[bert_model_name]
print(f'BERT model selected : {tfhub_handle_encoder}')
print(f'Preprocess model auto-selected: {tfhub_handle_preprocess}')
bert_preprocess_model = hub.KerasLayer(tfhub_handle_preprocess)
trainfeatures[0]
text_test = trainfeatures[0]
text_preprocessed = bert_preprocess_model(text_test)
bert_model = hub.KerasLayer(tfhub_handle_encoder)
bert_results = bert_model(text_preprocessed)
def build_classifier_model():
text_input = tf.keras.layers.Input(shape=(), dtype=tf.string, name='text')
preprocessing_layer = hub.KerasLayer(tfhub_handle_preprocess, name='preprocessing')
encoder_inputs = preprocessing_layer(text_input)
encoder = hub.KerasLayer(tfhub_handle_encoder, trainable=True, name='BERT_encoder')
outputs = encoder(encoder_inputs)
net = outputs['pooled_output']
net = tf.keras.layers.Dropout(0.1)(net)
net = tf.keras.layers.Dense(intent_label_output_size, activation=None, name='classifier')(net)
return tf.keras.Model(text_input, net)
"""Let's check that the model runs with the output of the preprocessing model."""
classifier_model = build_classifier_model()
bert_raw_result = classifier_model(tf.constant(trainfeatures[0]))
print(tf.keras.activations.softmax(bert_raw_result))
"""The output is meaningless, of course, because the model has not been trained yet.
Let's take a look at the model's structure.
"""
classifier_model.summary()
"""## Model training
You now have all the pieces to train a model, including the preprocessing module, BERT encoder, data, and classifier.
Since this is a non-binary classification problem and the model outputs probabilities, you'll use `losses.CategoricalCrossentropy` loss function.
"""
loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
metrics = tf.metrics.CategoricalAccuracy()
"""### Loading the BERT model and training
Using the `classifier_model` you created earlier, you can compile the model with the loss, metric and optimizer.
"""
epochs=5
optimizer=tf.keras.optimizers.Adam(1e-5)
classifier_model.compile(optimizer=optimizer,
loss=loss,
metrics=metrics)
"""Note: training time will vary depending on the complexity of the BERT model you have selected."""
print(f'Training model with {tfhub_handle_encoder}')
history = classifier_model.fit(x=trainfeatures,y=trainlabels,
validation_data=(validfeatures,validlabels),
batch_size=32,
epochs=epochs)
classifier_model.save("caesarmodel/caesarnl.h5")
"""### Evaluate the model
Let's see how the model performs. Two values will be returned. Loss (a number which represents the error, lower values are better), and accuracy.
"""
loss, accuracy = classifier_model.evaluate(testfeatures,testlabels)
print(f'Loss: {loss}')
print(f'Accuracy: {accuracy}')
"""### Plot the accuracy and loss over time
Based on the `History` object returned by `model.fit()`. You can plot the training and validation loss for comparison, as well as the training and validation accuracy:
"""
history_dict = history.history
print(history_dict.keys())
acc = history_dict['categorical_accuracy']
val_acc = history_dict['val_categorical_accuracy']
loss = history_dict['loss']
val_loss = history_dict['val_loss']
epochs = range(1, len(acc) + 1)
fig = plt.figure(figsize=(10, 8))
fig.tight_layout()
plt.subplot(2, 1, 1)
# "bo" is for "blue dot"
plt.plot(epochs, loss, 'r', label='Training loss')
# b is for "solid blue line"
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.grid(True)
# plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.subplot(2, 1, 2)
plt.plot(epochs, acc, 'r', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.grid(True)
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend(loc='lower right')
plt.savefig(f"caesartrainperformance/{history_filename}")
"""In this plot, the red lines represents the training loss and accuracy, and the blue lines are the validation loss and accuracy.
Classifying arbitrary instructions:
"""
def print_my_examples(inputs, results):
result_for_printing = \
[f'input: {inputs[i]:<30} : estimated intent: {results[i]}'
for i in range(len(inputs))]
print(*result_for_printing, sep='\n')
print()
results = tf.nn.softmax(classifier_model(tf.constant(examples)))
binarizer.classes_
intents=binarizer.inverse_transform(results.numpy())
print_my_examples(examples, intents)
if __name__ == "__main__":
examples = [
'play a song from U2', # this is the same sentence tried earlier
'Will it rain tomorrow',
'I like to hear greatist hits from beastie boys',
'I like to book a table for 3 persons',
'5 stars for machines like me'
]
datafolder="intentdata/"
trainfile=datafolder+"train.csv"
testfile=datafolder+"test.csv"
validfile=datafolder+"valid.csv"
"""Next, the downloaded .csv-files for training, validation and test are imported into pandas dataframes:"""
traindf = pd.read_csv(trainfile)
validdf = pd.read_csv(validfile)
testdf = pd.read_csv(testfile)
CaesarNLTrain.train(traindf,validdf,testdf,examples)