Spaces:

palondomus
/

CaesarAIShowCase

Runtime error

CaesarAIShowCase / CaesarAINL /caesartrain.py

CaesarCloudSync

Caesar ShowCase Start

8ce55a3 almost 3 years ago

7.97 kB

	# -- coding: utf-8 --
	import os
	import json
	#import shutil
	import pickle
	import warnings
	import pandas as pd
	import tensorflow as tf
	import tensorflow_hub as hub
	import tensorflow_text as text
	from pylab import rcParams
	import matplotlib.pyplot as plt
	from sklearn.preprocessing import LabelBinarizer
	warnings.filterwarnings("ignore")
	tf.get_logger().setLevel('ERROR')

	class CaesarNLTrain:
	def train(traindf,validdf,testdf,examples,history_filename = "history.png"):
	intent_label_output_size = len(pd.unique(traindf["intent"]))
	trainfeatures=traindf.copy()
	trainlabels=trainfeatures.pop("intent")

	trainfeatures=trainfeatures.values



	"""One-Hot-Encoding of class-labels:"""



	binarizer=LabelBinarizer()
	trainlabels=binarizer.fit_transform(trainlabels.values)


	"""Preprocess test- and validation data in the same way as it has been done for training-data:"""

	testfeatures=testdf.copy()
	testlabels=testfeatures.pop("intent")
	validfeatures=validdf.copy()
	validlabels=validfeatures.pop("intent")

	testfeatures=testfeatures.values
	validfeatures=validfeatures.values

	testlabels=binarizer.transform(testlabels.values)
	validlabels=binarizer.transform(validlabels.values)
	pickle.dump(binarizer, open('caesarmodel/labelbinarizer.pkl', 'wb'))

	bert_model_name = 'small_bert/bert_en_uncased_L-8_H-512_A-8'
	with open("caesarberthubmodels/bert_to_handle.json") as f:
	map_name_to_handle = json.load(f)
	with open("caesarberthubmodels/bert_to_preprocess.json") as f:
	map_model_to_preprocess = json.load(f)



	tfhub_handle_encoder = map_name_to_handle[bert_model_name]
	tfhub_handle_preprocess = map_model_to_preprocess[bert_model_name]

	print(f'BERT model selected : {tfhub_handle_encoder}')
	print(f'Preprocess model auto-selected: {tfhub_handle_preprocess}')



	bert_preprocess_model = hub.KerasLayer(tfhub_handle_preprocess)


	trainfeatures[0]

	text_test = trainfeatures[0]
	text_preprocessed = bert_preprocess_model(text_test)

	bert_model = hub.KerasLayer(tfhub_handle_encoder)

	bert_results = bert_model(text_preprocessed)



	def build_classifier_model():
	text_input = tf.keras.layers.Input(shape=(), dtype=tf.string, name='text')
	preprocessing_layer = hub.KerasLayer(tfhub_handle_preprocess, name='preprocessing')
	encoder_inputs = preprocessing_layer(text_input)
	encoder = hub.KerasLayer(tfhub_handle_encoder, trainable=True, name='BERT_encoder')
	outputs = encoder(encoder_inputs)
	net = outputs['pooled_output']
	net = tf.keras.layers.Dropout(0.1)(net)
	net = tf.keras.layers.Dense(intent_label_output_size, activation=None, name='classifier')(net)
	return tf.keras.Model(text_input, net)

	"""Let's check that the model runs with the output of the preprocessing model."""

	classifier_model = build_classifier_model()
	bert_raw_result = classifier_model(tf.constant(trainfeatures[0]))
	print(tf.keras.activations.softmax(bert_raw_result))

	"""The output is meaningless, of course, because the model has not been trained yet.

	Let's take a look at the model's structure.
	"""

	classifier_model.summary()

	"""## Model training

	You now have all the pieces to train a model, including the preprocessing module, BERT encoder, data, and classifier.

	Since this is a non-binary classification problem and the model outputs probabilities, you'll use `losses.CategoricalCrossentropy` loss function.
	"""

	loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
	metrics = tf.metrics.CategoricalAccuracy()

	"""### Loading the BERT model and training

	Using the `classifier_model` you created earlier, you can compile the model with the loss, metric and optimizer.
	"""

	epochs=5
	optimizer=tf.keras.optimizers.Adam(1e-5)
	classifier_model.compile(optimizer=optimizer,
	loss=loss,
	metrics=metrics)

	"""Note: training time will vary depending on the complexity of the BERT model you have selected."""

	print(f'Training model with {tfhub_handle_encoder}')
	history = classifier_model.fit(x=trainfeatures,y=trainlabels,
	validation_data=(validfeatures,validlabels),
	batch_size=32,
	epochs=epochs)
	classifier_model.save("caesarmodel/caesarnl.h5")

	"""### Evaluate the model

	Let's see how the model performs. Two values will be returned. Loss (a number which represents the error, lower values are better), and accuracy.
	"""

	loss, accuracy = classifier_model.evaluate(testfeatures,testlabels)

	print(f'Loss: {loss}')
	print(f'Accuracy: {accuracy}')

	"""### Plot the accuracy and loss over time

	Based on the `History` object returned by `model.fit()`. You can plot the training and validation loss for comparison, as well as the training and validation accuracy:
	"""

	history_dict = history.history
	print(history_dict.keys())

	acc = history_dict['categorical_accuracy']
	val_acc = history_dict['val_categorical_accuracy']
	loss = history_dict['loss']
	val_loss = history_dict['val_loss']

	epochs = range(1, len(acc) + 1)
	fig = plt.figure(figsize=(10, 8))
	fig.tight_layout()

	plt.subplot(2, 1, 1)
	# "bo" is for "blue dot"
	plt.plot(epochs, loss, 'r', label='Training loss')
	# b is for "solid blue line"
	plt.plot(epochs, val_loss, 'b', label='Validation loss')
	plt.title('Training and validation loss')
	plt.grid(True)
	# plt.xlabel('Epochs')
	plt.ylabel('Loss')
	plt.legend()

	plt.subplot(2, 1, 2)
	plt.plot(epochs, acc, 'r', label='Training acc')
	plt.plot(epochs, val_acc, 'b', label='Validation acc')
	plt.title('Training and validation accuracy')
	plt.grid(True)
	plt.xlabel('Epochs')
	plt.ylabel('Accuracy')

	plt.legend(loc='lower right')
	plt.savefig(f"caesartrainperformance/{history_filename}")

	"""In this plot, the red lines represents the training loss and accuracy, and the blue lines are the validation loss and accuracy.

	Classifying arbitrary instructions:
	"""

	def print_my_examples(inputs, results):
	result_for_printing = \
	[f'input: {inputs[i]:<30} : estimated intent: {results[i]}'
	for i in range(len(inputs))]
	print(*result_for_printing, sep='\n')
	print()




	results = tf.nn.softmax(classifier_model(tf.constant(examples)))

	binarizer.classes_

	intents=binarizer.inverse_transform(results.numpy())

	print_my_examples(examples, intents)
	if __name__ == "__main__":
	examples = [
	'play a song from U2', # this is the same sentence tried earlier
	'Will it rain tomorrow',
	'I like to hear greatist hits from beastie boys',
	'I like to book a table for 3 persons',
	'5 stars for machines like me'
	]
	datafolder="intentdata/"
	trainfile=datafolder+"train.csv"
	testfile=datafolder+"test.csv"
	validfile=datafolder+"valid.csv"

	"""Next, the downloaded .csv-files for training, validation and test are imported into pandas dataframes:"""

	traindf = pd.read_csv(trainfile)
	validdf = pd.read_csv(validfile)
	testdf = pd.read_csv(testfile)

	CaesarNLTrain.train(traindf,validdf,testdf,examples)