Spaces:
Build error
Build error
PeteBleackley commited on
Commit ·
f16a715
1
Parent(s): eef3bd6
More work on models
Browse files
qarac/models/QaracDecoderModel.py
CHANGED
|
@@ -7,31 +7,111 @@ Created on Tue Sep 5 10:29:03 2023
|
|
| 7 |
"""
|
| 8 |
|
| 9 |
import keras
|
|
|
|
| 10 |
import transformers
|
| 11 |
|
| 12 |
class QaracDecoderHead(keras.layers.Layer):
|
| 13 |
|
| 14 |
def __init__(self,config):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
super(QaracDecoderHead,self).__init__()
|
| 16 |
self.concat = keras.layers.Concatenate(axis=1)
|
| 17 |
self.layer_0 = transformers.TFRobertaLayer(config)
|
| 18 |
self.layer_1 = transformers.TFRobertalayer(config)
|
| 19 |
self.head = transformers.TFRobertaLMHead(config)
|
| 20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
def call(self,inputs):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
vectors = self.concat(inputs)
|
| 23 |
l0 = self.layer_0(vectors)
|
| 24 |
return self.head(self.layer1(l0.last_hidden_state[:,1:]))
|
| 25 |
|
| 26 |
-
class QaracDecoderModel(transformers.TFPretrainedModel):
|
| 27 |
|
| 28 |
-
def __init__(self,base_model):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
super(QaracDecoderModel,self).__init__()
|
| 30 |
self.base_model = base_model
|
| 31 |
self.decoder_head = QaracDecoderHead(self.base_model.config)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
-
def call(self,inputs):
|
| 34 |
-
|
| 35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
|
| 37 |
|
|
|
|
| 7 |
"""
|
| 8 |
|
| 9 |
import keras
|
| 10 |
+
import tensorflow
|
| 11 |
import transformers
|
| 12 |
|
| 13 |
class QaracDecoderHead(keras.layers.Layer):
|
| 14 |
|
| 15 |
def __init__(self,config):
|
| 16 |
+
"""
|
| 17 |
+
Creates the Decoder head
|
| 18 |
+
|
| 19 |
+
Parameters
|
| 20 |
+
----------
|
| 21 |
+
config : transformers.RobertaConfig
|
| 22 |
+
Config for the RobertaModel that this head will be attached to.
|
| 23 |
+
|
| 24 |
+
Returns
|
| 25 |
+
-------
|
| 26 |
+
None.
|
| 27 |
+
|
| 28 |
+
"""
|
| 29 |
super(QaracDecoderHead,self).__init__()
|
| 30 |
self.concat = keras.layers.Concatenate(axis=1)
|
| 31 |
self.layer_0 = transformers.TFRobertaLayer(config)
|
| 32 |
self.layer_1 = transformers.TFRobertalayer(config)
|
| 33 |
self.head = transformers.TFRobertaLMHead(config)
|
| 34 |
|
| 35 |
+
def build(self,input_shape):
|
| 36 |
+
"""
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
Parameters
|
| 40 |
+
----------
|
| 41 |
+
input_shape : tuple
|
| 42 |
+
Input shape.
|
| 43 |
+
|
| 44 |
+
Returns
|
| 45 |
+
-------
|
| 46 |
+
None.
|
| 47 |
+
|
| 48 |
+
"""
|
| 49 |
+
self.built = True
|
| 50 |
+
|
| 51 |
def call(self,inputs):
|
| 52 |
+
"""
|
| 53 |
+
Predicts text fron vector and hidden states of base model
|
| 54 |
+
|
| 55 |
+
Parameters
|
| 56 |
+
----------
|
| 57 |
+
inputs : tuple of tensorflow.Tensors
|
| 58 |
+
Vector to be decoded and last hidden states of base model
|
| 59 |
+
|
| 60 |
+
Returns
|
| 61 |
+
-------
|
| 62 |
+
transformers.modeling_tf_outputs.TFCausalLMOutputWithCrossAttentions
|
| 63 |
+
Predicted text
|
| 64 |
+
|
| 65 |
+
"""
|
| 66 |
vectors = self.concat(inputs)
|
| 67 |
l0 = self.layer_0(vectors)
|
| 68 |
return self.head(self.layer1(l0.last_hidden_state[:,1:]))
|
| 69 |
|
| 70 |
+
class QaracDecoderModel(transformers.TFPretrainedModel,transformers.TFGenerationMixin):
|
| 71 |
|
| 72 |
+
def __init__(self,base_model,tokenizer):
|
| 73 |
+
"""
|
| 74 |
+
Creates decoder model from base model
|
| 75 |
+
|
| 76 |
+
Parameters
|
| 77 |
+
----------
|
| 78 |
+
base_model : transformers.TFRobertaModel
|
| 79 |
+
The base model
|
| 80 |
+
|
| 81 |
+
Returns
|
| 82 |
+
-------
|
| 83 |
+
None.
|
| 84 |
+
|
| 85 |
+
"""
|
| 86 |
super(QaracDecoderModel,self).__init__()
|
| 87 |
self.base_model = base_model
|
| 88 |
self.decoder_head = QaracDecoderHead(self.base_model.config)
|
| 89 |
+
self.tokenizer = tokenizer
|
| 90 |
+
self.start=None
|
| 91 |
+
self.end=None
|
| 92 |
+
self.pad=None
|
| 93 |
|
| 94 |
+
def call(self,inputs,**kwargs):
|
| 95 |
+
"""
|
| 96 |
+
Predicts text from inputs
|
| 97 |
+
|
| 98 |
+
Parameters
|
| 99 |
+
----------
|
| 100 |
+
inputs : tuple of Tensorflow.Tensors OR tensorflow.Tensor
|
| 101 |
+
Vector to be converted to text and seed text ORtokenized seed text
|
| 102 |
+
kwargs : optional keyword arguments
|
| 103 |
+
vector : tensorflow.Tensor vector to be decoded. May be supplied
|
| 104 |
+
via a keyword argument when this is invoked by .generate
|
| 105 |
+
|
| 106 |
+
Returns
|
| 107 |
+
-------
|
| 108 |
+
transformers.modeling_tf_outputs.TFCausalLMOutputWithCrossAttentions
|
| 109 |
+
Predicted text
|
| 110 |
+
|
| 111 |
+
"""
|
| 112 |
+
(v,s) = (kwargs['vector'],inputs) if 'vector' in kwargs else inputs
|
| 113 |
+
return self.decoder_head((v,self.base_model(s).last_hidden_state))
|
| 114 |
+
|
| 115 |
+
|
| 116 |
|
| 117 |
|
qarac/models/QaracEncoderModel.py
CHANGED
|
@@ -7,16 +7,59 @@ Created on Tue Sep 5 10:01:39 2023
|
|
| 7 |
"""
|
| 8 |
|
| 9 |
import transformers
|
| 10 |
-
import qarac.layers.GlobalAttentionPoolingHead
|
| 11 |
|
| 12 |
class QaracEncoderModel(transformers.TFPretrainedModel):
|
| 13 |
|
| 14 |
def __init__(self,base_model):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
super(QaracEncoderModel,self).__init__()
|
| 16 |
self.base_model = base_model
|
| 17 |
-
self.head = qarac.layers.GlobalAttentionPoolingHead.GlobalAttentionPoolingHead()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
def call(self,inputs):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
return self.head(self.base_model(inputs).last_hidden_state)
|
| 21 |
|
| 22 |
|
|
|
|
| 7 |
"""
|
| 8 |
|
| 9 |
import transformers
|
| 10 |
+
import qarac.models.layers.GlobalAttentionPoolingHead
|
| 11 |
|
| 12 |
class QaracEncoderModel(transformers.TFPretrainedModel):
|
| 13 |
|
| 14 |
def __init__(self,base_model):
|
| 15 |
+
"""
|
| 16 |
+
Creates the endocer model
|
| 17 |
+
|
| 18 |
+
Parameters
|
| 19 |
+
----------
|
| 20 |
+
base_model : transformers.TFRobertaModel
|
| 21 |
+
The base model
|
| 22 |
+
|
| 23 |
+
Returns
|
| 24 |
+
-------
|
| 25 |
+
None.
|
| 26 |
+
|
| 27 |
+
"""
|
| 28 |
super(QaracEncoderModel,self).__init__()
|
| 29 |
self.base_model = base_model
|
| 30 |
+
self.head = qarac.models.layers.GlobalAttentionPoolingHead.GlobalAttentionPoolingHead()
|
| 31 |
+
|
| 32 |
+
def build(self,input_shape):
|
| 33 |
+
"""
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
Parameters
|
| 37 |
+
----------
|
| 38 |
+
input_shape : tuple
|
| 39 |
+
shape of input data.
|
| 40 |
+
|
| 41 |
+
Returns
|
| 42 |
+
-------
|
| 43 |
+
None.
|
| 44 |
+
|
| 45 |
+
"""
|
| 46 |
+
self.built=True
|
| 47 |
|
| 48 |
def call(self,inputs):
|
| 49 |
+
"""
|
| 50 |
+
Vectorizes a tokenised text
|
| 51 |
+
|
| 52 |
+
Parameters
|
| 53 |
+
----------
|
| 54 |
+
inputs : tensorflow.Tensor
|
| 55 |
+
tokenized text to endode
|
| 56 |
+
|
| 57 |
+
Returns
|
| 58 |
+
-------
|
| 59 |
+
tensorflow.Tensor
|
| 60 |
+
Vector representing the document
|
| 61 |
+
|
| 62 |
+
"""
|
| 63 |
return self.head(self.base_model(inputs).last_hidden_state)
|
| 64 |
|
| 65 |
|
qarac/models/QaracTrainerModel.py
CHANGED
|
@@ -12,14 +12,56 @@ import QaracDecoderModel
|
|
| 12 |
|
| 13 |
class QuaracTrainerModel(keras.Model):
|
| 14 |
|
| 15 |
-
def __init__(self,base_encoder_model,base_decoder_model):
|
| 16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
self.question_encoder = QaracEncoderModel.QaracEncoderModel(base_encoder_model)
|
| 18 |
self.answer_encoder = QaracEncoderModel.QaracEncoderModel(base_encoder_model)
|
| 19 |
-
self.decoder = QaracDecoderModel.QaracDecoderModel(base_decoder_model)
|
| 20 |
self.consistency = keras.layers.Dot(axes=1,normalize=True)
|
| 21 |
|
| 22 |
def call(self,inputs,training=None):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
results = {}
|
| 24 |
results['encode_decode'] = self.decoder((self.answer_encoder(inputs['all_text']),
|
| 25 |
inputs['offset_text']))
|
|
|
|
| 12 |
|
| 13 |
class QuaracTrainerModel(keras.Model):
|
| 14 |
|
| 15 |
+
def __init__(self,base_encoder_model,base_decoder_model,tokenizer):
|
| 16 |
+
"""
|
| 17 |
+
Sets up the Trainer model
|
| 18 |
+
|
| 19 |
+
Parameters
|
| 20 |
+
----------
|
| 21 |
+
base_encoder_model : transformers.TFRobertaModel
|
| 22 |
+
Base model for encoders.
|
| 23 |
+
base_decoder_model : transformers.TFRobertaModel
|
| 24 |
+
Base model for decoder
|
| 25 |
+
tokenizer : transformers.RobertaTokenizer
|
| 26 |
+
Tokeniaer for decoder
|
| 27 |
+
Returns
|
| 28 |
+
-------
|
| 29 |
+
None.
|
| 30 |
+
|
| 31 |
+
"""
|
| 32 |
self.question_encoder = QaracEncoderModel.QaracEncoderModel(base_encoder_model)
|
| 33 |
self.answer_encoder = QaracEncoderModel.QaracEncoderModel(base_encoder_model)
|
| 34 |
+
self.decoder = QaracDecoderModel.QaracDecoderModel(base_decoder_model,tokenizer)
|
| 35 |
self.consistency = keras.layers.Dot(axes=1,normalize=True)
|
| 36 |
|
| 37 |
def call(self,inputs,training=None):
|
| 38 |
+
"""
|
| 39 |
+
Generates training objective outputs from training data
|
| 40 |
+
|
| 41 |
+
Parameters
|
| 42 |
+
----------
|
| 43 |
+
inputs : dict[str,tensoflow.tensor]
|
| 44 |
+
Fields are
|
| 45 |
+
'all_text': Tokenized text to train answer encoder to produce vectors
|
| 46 |
+
and decoder to convert them back to text
|
| 47 |
+
'offset_text': Same text as in 'all_text', but preceded by <s>
|
| 48 |
+
'question': Tokenized text of questions for question answering
|
| 49 |
+
objective
|
| 50 |
+
'answer': Tokenized text of answers for question answering objective
|
| 51 |
+
'proposition0': tokenized proposition for reasoning objective
|
| 52 |
+
'proposition1': tokenized proposition for reasoning objective
|
| 53 |
+
'conclusion_offset': tokenized text of conclusions for reasoning
|
| 54 |
+
objective, prefixed by '<s>'
|
| 55 |
+
'statement0': tokenized statement for consistency objective
|
| 56 |
+
training : Bool, optional
|
| 57 |
+
Not used. The default is None.
|
| 58 |
+
|
| 59 |
+
Returns
|
| 60 |
+
-------
|
| 61 |
+
results : TYPE
|
| 62 |
+
DESCRIPTION.
|
| 63 |
+
|
| 64 |
+
"""
|
| 65 |
results = {}
|
| 66 |
results['encode_decode'] = self.decoder((self.answer_encoder(inputs['all_text']),
|
| 67 |
inputs['offset_text']))
|
qarac/models/layers/GlobalAttentionPoolingHead.py
CHANGED
|
@@ -12,37 +12,66 @@ import tensorflow
|
|
| 12 |
class GlobalAttentionPoolingHead(keras.layers.Layer):
|
| 13 |
|
| 14 |
def __init__(self):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
super(GlobalAttentionPoolingHead,self).__init__()
|
| 16 |
self.global_projection = None
|
| 17 |
self.local_projection = None
|
| 18 |
|
| 19 |
|
| 20 |
def build(self,input_shape):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
width = input_shape[-1]
|
| 22 |
self.global_projection = self.add_weight('global projection',shape=(width,width))
|
| 23 |
self.local_projection = self.add_weight('local projection',shape=(width,width))
|
| 24 |
-
self.
|
| 25 |
|
| 26 |
-
@tensorflow.function
|
| 27 |
-
def project(self,X):
|
| 28 |
-
return tensorflow.tensordot(X,self.local_projection,axes=1)
|
| 29 |
|
| 30 |
-
def attention_function(self,gp):
|
| 31 |
-
@tensorflow.function
|
| 32 |
-
def inner(lp):
|
| 33 |
-
return tensorflow.tensordot(lp,gp,axes=1)
|
| 34 |
-
return inner
|
| 35 |
|
| 36 |
def call(self,X,training=None):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
gp = tensorflow.linalg.l2_normalize(tensorflow.tensordot([tensorflow.reduce_sum(X,
|
| 38 |
axis=1),
|
| 39 |
self.global_projection],
|
| 40 |
axes=1),
|
| 41 |
axis=1)
|
| 42 |
-
lp = tensorflow.linalg.l2_normalize(tensorflow.
|
| 43 |
-
|
|
|
|
| 44 |
axis=2)
|
| 45 |
-
attention = tensorflow.
|
| 46 |
-
lp)
|
| 47 |
return tensorflow.reduce_sum(attention *X,
|
| 48 |
axis=1)
|
|
|
|
| 12 |
class GlobalAttentionPoolingHead(keras.layers.Layer):
|
| 13 |
|
| 14 |
def __init__(self):
|
| 15 |
+
"""
|
| 16 |
+
Creates the layer
|
| 17 |
+
|
| 18 |
+
Returns
|
| 19 |
+
-------
|
| 20 |
+
None.
|
| 21 |
+
|
| 22 |
+
"""
|
| 23 |
super(GlobalAttentionPoolingHead,self).__init__()
|
| 24 |
self.global_projection = None
|
| 25 |
self.local_projection = None
|
| 26 |
|
| 27 |
|
| 28 |
def build(self,input_shape):
|
| 29 |
+
"""
|
| 30 |
+
Initialises layer weights
|
| 31 |
+
|
| 32 |
+
Parameters
|
| 33 |
+
----------
|
| 34 |
+
input_shape : tuple
|
| 35 |
+
Shape of the input layer
|
| 36 |
+
|
| 37 |
+
Returns
|
| 38 |
+
-------
|
| 39 |
+
None.
|
| 40 |
+
|
| 41 |
+
"""
|
| 42 |
width = input_shape[-1]
|
| 43 |
self.global_projection = self.add_weight('global projection',shape=(width,width))
|
| 44 |
self.local_projection = self.add_weight('local projection',shape=(width,width))
|
| 45 |
+
self.built=True
|
| 46 |
|
|
|
|
|
|
|
|
|
|
| 47 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
|
| 49 |
def call(self,X,training=None):
|
| 50 |
+
"""
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
Parameters
|
| 54 |
+
----------
|
| 55 |
+
X : tensorflow.Tensor
|
| 56 |
+
Base model vectors to apply pooling to.
|
| 57 |
+
training : bool, optional
|
| 58 |
+
Not used. The default is None.
|
| 59 |
+
|
| 60 |
+
Returns
|
| 61 |
+
-------
|
| 62 |
+
tensorflow.Tensor
|
| 63 |
+
The pooled value.
|
| 64 |
+
|
| 65 |
+
"""
|
| 66 |
gp = tensorflow.linalg.l2_normalize(tensorflow.tensordot([tensorflow.reduce_sum(X,
|
| 67 |
axis=1),
|
| 68 |
self.global_projection],
|
| 69 |
axes=1),
|
| 70 |
axis=1)
|
| 71 |
+
lp = tensorflow.linalg.l2_normalize(tensorflow.tensordot(X,
|
| 72 |
+
self.local_projection,
|
| 73 |
+
axes=1),
|
| 74 |
axis=2)
|
| 75 |
+
attention = tensorflow.tensordot(lp,gp,axes=1)
|
|
|
|
| 76 |
return tensorflow.reduce_sum(attention *X,
|
| 77 |
axis=1)
|
scripts.py
CHANGED
|
@@ -9,7 +9,10 @@ import qarac.models.qarac_base_model
|
|
| 9 |
import keras
|
| 10 |
import tensorflow
|
| 11 |
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
|
| 15 |
|
|
|
|
| 9 |
import keras
|
| 10 |
import tensorflow
|
| 11 |
|
| 12 |
+
def decoder_loss(y_true,y_pred):
|
| 13 |
+
return keras.losses.sparse_categorical_crossentropy(y_true,
|
| 14 |
+
y_pred.logits,
|
| 15 |
+
logits=True)
|
| 16 |
|
| 17 |
|
| 18 |
|