krotima1 commited on
Commit
8cf2e56
·
1 Parent(s): 6885f5c

feat: add final AlignScoreCS.py file for ease of use with transformers.

Browse files
Files changed (1) hide show
  1. AlignScoreCS.py +220 -395
AlignScoreCS.py CHANGED
@@ -6,32 +6,37 @@ import numpy as np
6
  from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple, Union
7
  import torch.nn as nn
8
  import torch
9
- # This include should be add when using different AlignScoreFunction methods instead of score()
10
- # from nltk.tokenize import sent_tokenize
11
  from tqdm import tqdm
12
 
13
  class AlignScoreCS(transformers.XLMRobertaModel):
14
  """
15
- ALIGNSCORE class
16
-
17
  Description:
18
  Model ALIGNSCORECS has been trained according the paper for 3 days on 4GPUs AMD NVIDIA.
19
  (3 epochs, 1e-5 learning rate, 1e-6 AdamWeps, batchsize 32, WarmupRatio 0.06, 0.1 WeighDecay)
20
- - XLMROBERTA-base model with 3 classification HEAD {regression,binary,3way} using shared encoder
 
 
 
 
21
 
22
  USAGE: AlignScore.py
23
- - from_pretrained - loads the model, usage as transformers.model
 
24
  - .score(context, claim) - function
25
  - returns probs of the ALIGNED class using 3way class head as in the paper.
26
-
 
 
 
27
  alignScoreCS = AlignScoreCS.from_pretrained("/mnt/data/factcheck/AlignScore-data/AAmodel/MTLModel/mo
28
  alignScoreCS.score(context,claim)
29
-
30
  If you want to try different classification head use parameter:
31
  - task_name = "re" : regression head
32
  - task_name = "bin" : binary classification head
33
  - task_name = "3way" : 3way classification head
34
-
35
  """
36
  _regression_model = "re_model"
37
  _binary_class_model = "bin_model"
@@ -41,324 +46,74 @@ class AlignScoreCS(transformers.XLMRobertaModel):
41
  super().__init__(transformers.XLMRobertaConfig(), **kwargs)
42
  self.encoder = encoder
43
  self.taskmodels_dict = nn.ModuleDict(taskmodels_dict)
44
- self.tokenizer = None
45
  self.model_name = model_name
 
46
  self.inferencer = None
 
 
47
 
48
  def init_inferencer(self, device = "cuda"):
49
  self.tokenizer = transformers.AutoTokenizer.from_pretrained(self.model_name) if not self.tokenizer else self.tokenizer
50
  self.inferencer = self.InferenceHandler(self, self.tokenizer, device)
51
 
52
-
53
-
54
  """
55
- Score: scores the context and claim with Aligned probabitlity of 3way classification head
56
- - using paper code inferencer from ALignScore
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
  """
59
- def score(self, context, claim, **kwargs):
60
- if self.inferencer is None:
61
- self.init_inferencer()
62
- scores = self.inferencer.nlg_eval(context, claim)
63
  return scores
 
64
 
65
  """
66
- Score: scores the context and claim with ALIGNED probability (wrt task_name ["re" | "bin" | "3way"])
67
-
68
- Returns the probability of the ALIGNED CLASS between context text and claim text
69
- - chunks text by 350 tokens and splits claim into sentences
70
- - using 3way classification head
71
- """
72
- def score_sentences(self, context :str, claim :str, task_name = "3way", batch_size = 2, return_all_outputs = False, **kwargs):
73
- self.tokenizer = transformers.AutoTokenizer.from_pretrained(self.model_name) if not self.tokenizer else self.tokenizer
74
- chunked_inputs = self.chunk_sent_input(context,claim, chunk_size=350,chunk_claim_size=150)
75
- nclaims, ncontexts = (chunked_inputs["n_claims"],chunked_inputs["n_contexts"])
76
- with torch.no_grad():
77
- chunked_inputs = {key : torch.tensor(item).to(self.device) for key, item in chunked_inputs.items() if not key.startswith("n_")}
78
- chunked_outputs = {}
79
- for i in range(0,len(chunked_inputs["input_ids"]),batch_size):
80
- tmp = self.forward(task_name = task_name,**{"input_ids":chunked_inputs["input_ids"][i:i+batch_size],"attention_mask" :chunked_inputs["attention_mask"][i:i+batch_size]}, **kwargs)
81
- for k, item in tmp.items():
82
- chunked_outputs[k] = chunked_outputs.get(k, []) + [item]
83
- logits = torch.vstack(chunked_outputs["logits"]).cpu()
84
- outputs = {"score" : self.alignscore_input(logits,nclaims=nclaims,ncontexts=ncontexts, task_name=task_name)}
85
- outputs["outputs"] = chunked_outputs
86
- return torch.tensor([outputs["score"]]) if not return_all_outputs else outputs
87
-
88
-
89
- """
90
- Score: scores the context and claim with ALIGNED probability (wrt task_name ["re" | "bin" | "3way"])
91
-
92
- Returns the probability of the ALIGNED CLASS between context text and claim text
93
- - chunks text into 350 tolens and chunks claim into 150 tokens
94
- - using 3way classification head
95
- """
96
- def score_chunks(self, context :str, claim :str, task_name = "3way", batch_size = 2, return_all_outputs = False, **kwargs):
97
- self.tokenizer = transformers.AutoTokenizer.from_pretrained(self.model_name) if not self.tokenizer else self.tokenizer
98
- chunked_inputs = self.chunk_inputs(context,claim, chunk_size=350)
99
- chunked_inputs = {key : torch.tensor(item).to(self.device) for key, item in chunked_inputs.items()}
100
- chunked_outputs = self.forward(task_name = task_name, **chunked_inputs, **kwargs)
101
- outputs = {"score" : self.alignscore_input_deprecated(chunked_outputs.logits.cpu(), task_name=task_name)}
102
- outputs["outputs"] = chunked_outputs
103
- return outputs["score"] if not return_all_outputs else outputs
104
-
105
- """
106
- Classify: classify the context and claim to the class label given the task_name ["re" | "bin" | "3way"]
107
-
108
- Returns the class of {Neutral, contradict, aligned} between context text and claim text
109
- - using 3way classification head
110
  """
111
- def classify(self, context :str, claim :str, task_name = "3way", return_all_outputs = False, **kwargs):
112
- self.tokenizer = transformers.AutoTokenizer.from_pretrained(self.model_name) if not self.tokenizer else self.tokenizer
113
- chunked_inputs = self.chunk_inputs(context,claim, chunk_size=350)
114
- chunked_inputs = {key : torch.tensor(item).to(self.device) for key, item in chunked_inputs.items()}
115
- chunked_outputs = self.forward(task_name = task_name, **chunked_inputs, **kwargs)
116
- outputs = {"class" : self.get_system_label(chunked_outputs.logits.cpu(), task_name=task_name)}
117
- outputs["outputs"] = chunked_outputs
118
- return outputs["class"] if not return_all_outputs else outputs
119
-
120
-
121
- def score_truncated(self, context :str, claim :str, task_name = "3way", return_all_outputs = False, **kwargs):
122
- self.tokenizer = transformers.AutoTokenizer.from_pretrained(self.model_name) if not self.tokenizer else self.tokenizer
123
- tokenized_inputs = self.tokenizer(list(zip([context], [claim])), padding = "max_length", truncation = True, max_length = 512, return_tensors="pt")
124
- tokenized_inputs = {key : torch.tensor(item).to(self.device) for key, item in tokenized_inputs.items()}
125
- with torch.no_grad():
126
- model_outputs = self.forward(task_name=task_name, **tokenized_inputs, **kwargs)
127
- outputs = {"score" : self.alignscore_input(model_outputs["logits"].cpu(),nclaims=1, ncontexts=1, task_name=task_name)}
128
- outputs["outputs"] = model_outputs
129
- return torch.tensor([outputs["score"]]) if not return_all_outputs else outputs
130
 
131
  def forward(self, task_name = "3way", **kwargs):
132
  return self.taskmodels_dict[task_name](**kwargs)
133
 
134
  def __call__(self, task_name, **kwargs):
135
  return self.taskmodels_dict[task_name](**kwargs)
136
-
137
- """
138
- Get the probability of the ALIGNED label from input
139
- """
140
- def alignscore_input(self, chunked_logits, nclaims, ncontexts, task_name = "3way"):
141
- if task_name == "re":
142
- ouptuts = chunked_logits.detach()
143
- # Reshape the tensor to separate each block of n rows
144
- reshaped_tensor = ouptuts.view(nclaims, ncontexts)
145
 
146
- # Extract the maximum values from the first column (index 0) within each block of n rows
147
- max_values, _ = reshaped_tensor.max(dim=1)
148
-
149
- # Calculate the mean of the max values for each block of n rows
150
- mean_of_maxes = torch.mean(max_values, dim=0)
151
- return mean_of_maxes.tolist()
152
- else:
153
- nlabels = {"3way" : 3, "re" : 1, "2way" : 2}[task_name]
154
- ouptuts = chunked_logits.softmax(1).detach()
155
- # Reshape the tensor to separate each block of n rows
156
- reshaped_tensor = ouptuts.view(nclaims, ncontexts, nlabels)
157
 
158
- # Extract the maximum values from the first column (index 0) within each block of n rows
159
- max_values, _ = torch.max(reshaped_tensor[:, :, 1], dim=1)
160
-
161
- # Calculate the mean of the max values for each block of n rows
162
- mean_of_maxes = torch.mean(max_values, dim=0)
163
- return mean_of_maxes.tolist()
164
-
165
-
166
- def alignscore_input_deprecated(self, chunked_logits, task_name = "3way"):
167
- if task_name == "re":
168
- return chunked_logits.detach().amax(0).tolist()
169
- else:
170
- return chunked_logits.softmax(1).detach()[:, 1].amax(0).tolist() # return max probability over the ALIGNED class
171
-
172
-
173
- """
174
- get the label from the input
175
- """
176
- def get_system_label(self, chunked_logits, task_name):
177
- if task_name == "re":
178
- return (chunked_logits.sum(0) / chunked_logits.size()[0]).detach().tolist()
179
- else:
180
- avg_probs = chunked_logits.softmax(1).sum(0) / chunked_logits.size()[0]
181
- numpy_array = chunked_logits.softmax(1).argmax(1).detach().numpy()
182
- # Calculate the frequencies of each value
183
- unique_values, counts = np.unique(numpy_array, return_counts=True)
184
- # Find the maximum count
185
- max_count = np.max(counts)
186
- # Find all values with the maximum count
187
- most_frequent_values = unique_values[counts == max_count]
188
- return most_frequent_values[0] if most_frequent_values.size == 1 else avg_probs.detach().argmax().tolist()
189
-
190
- """
191
- Chunks input context and claim - context is chunked into 350 tokens
192
- - claim is chunked into sentences
193
- - using stride for overflowing tokens
194
- """
195
- def chunk_sent_input(self, context, claim, max_length = 512, chunk_size = 350, chunk_claim_size = 150):
196
- assert chunk_size <= max_length, "Chunk size {} cannot be greater than max size {}".format(chunk_size, chunk_claim_size, max_length)
197
- chunk_claim_size = max_length - chunk_size if chunk_claim_size is None else chunk_claim_size
198
- assert chunk_size + chunk_claim_size <= max_length, "Chunk size {} and Chunk claim size {} cannot be together greater than max size {}".format(chunk_size, chunk_claim_size, max_length)
199
- return_chunked_inputs = {}
200
- context_chunks = self.chunk_text(context, chunk_size=chunk_size, overflowing_tokens_stride = 25, first_special_token=[0])
201
- claim_chunks = self.chunk_sentences(claim, chunk_size=chunk_claim_size,overflowing_tokens_stride=int(chunk_claim_size/3), first_special_token=[2])
202
- for claim_chunk in claim_chunks:
203
- for context_chunk in context_chunks:
204
- inputs,attention =self.fill_with_pad_tokens(context_chunk,claim_chunk )
205
- return_chunked_inputs["input_ids"] = return_chunked_inputs.get("input_ids",[]) + [inputs]
206
- return_chunked_inputs["attention_mask"] = return_chunked_inputs.get("attention_mask",[]) + [attention]
207
- return_chunked_inputs["n_claims"] = len(claim_chunks)
208
- return_chunked_inputs["n_contexts"] = len(context_chunks)
209
- return return_chunked_inputs
210
-
211
- """
212
- According to paper - chunk the text into smaller parts (350tokens + claim_tokens) when the tokenized inputs exceed the max_length
213
- returns chunked input
214
- """
215
- def chunk_inputs(self, context, claim, max_length = 512, chunk_size = 512, first_fit_within_max_length = True):
216
- assert chunk_size <= max_length, "Chunk size {} cannot be greater than max size {}".format(chunk_size, max_length)
217
-
218
- tokenized_claim = self.tokenizer(claim, return_length=True)
219
- tokenized_claim["input_ids"][0] = 2 # </s> token according to pair tokenization where the separator of the context and claim is </s></s>
220
- tokenized_context = self.tokenizer(context, return_length = True)
221
- assert tokenized_claim["length"][0] < max_length*4/5, "Create chunks of claim sentences. Claim is too long {} which is more than 4/5 from {}.".format(tokenized_claim["length"][0], max_length)
222
 
223
- # set chunk size to incorporate the claim size as well
224
- chunk_size = min(max_length, chunk_size + tokenized_claim["length"][0])
225
 
226
- first_check_max_size = max_length if first_fit_within_max_length else chunk_size
227
-
228
- if tokenized_claim["length"][0] + tokenized_context["length"][0] <= first_check_max_size: #if it fits within max_length
229
- input_ids, attention_mask = self.fill_with_pad_tokens(tokenized_context["input_ids"],tokenized_claim["input_ids"])
230
- return {"input_ids" : [input_ids], "attention_mask" : [attention_mask]}
231
- else: # make chunks
232
- return_chunked_inputs = {}
233
- current_chunk = {}
234
- for sentence in sent_tokenize(context, language="czech"):
235
- tok_sent = self.tokenizer(sentence, return_length=True)
236
- if len(current_chunk.get("input_ids",[0])) + tok_sent["length"][0] - 1 + tokenized_claim["length"][0] <= chunk_size:
237
- current_chunk["input_ids"] = current_chunk.get("input_ids",[0]) + tok_sent["input_ids"][1:-1]
238
- else:
239
- return_chunked_inputs = self._update_chunked_inputs(tokenized_claim, current_chunk, return_chunked_inputs, max_length, tok_sent)
240
- current_chunk["input_ids"] = [0] + tok_sent["input_ids"][1:-1]
241
- if current_chunk != {}: # add the rest
242
- return_chunked_inputs = self._update_chunked_inputs(tokenized_claim, current_chunk, return_chunked_inputs, max_length)
243
- current_chunk = {}
244
- return return_chunked_inputs
245
-
246
- """
247
- Chunks input context and claim - context is chunked into 350 tokens
248
- - claim is chunked into 150 tokens
249
- - using stride for overflowing tokens
250
- """
251
- def chunk_input_deprecated(self, context, claim, max_length = 512, chunk_size = 350, chunk_claim_size = 150):
252
- assert chunk_size <= max_length, "Chunk size {} cannot be greater than max size {}".format(chunk_size, chunk_claim_size, max_length)
253
- chunk_claim_size = max_length - chunk_size if chunk_claim_size is None else chunk_claim_size
254
- assert chunk_size + chunk_claim_size <= max_length, "Chunk size {} and Chunk claim size {} cannot be together greater than max size {}".format(chunk_size, chunk_claim_size, max_length)
255
- return_chunked_inputs = {}
256
- context_chunks = self.chunk_text(context, chunk_size=chunk_size, overflowing_tokens_stride = 25, first_special_token=[0])
257
- claim_chunks = self.chunk_text(claim, chunk_size=chunk_claim_size,overflowing_tokens_stride=int(chunk_claim_size/3), first_special_token=[2])
258
- for claim_chunk in claim_chunks:
259
- for context_chunk in context_chunks:
260
- inputs,attention =self.fill_with_pad_tokens(context_chunk,claim_chunk )
261
- return_chunked_inputs["input_ids"] = return_chunked_inputs.get("input_ids",[]) + [inputs]
262
- return_chunked_inputs["attention_mask"] = return_chunked_inputs.get("attention_mask",[]) + [attention]
263
- return_chunked_inputs["n_claims"] = len(claim_chunks)
264
- return_chunked_inputs["n_contexts"] = len(context_chunks)
265
- return return_chunked_inputs
266
 
267
-
268
- """
269
- Chunk texts into blocks of chunk_size tokens
270
-
271
- """
272
- def chunk_text(self, text, chunk_size = 350, overflowing_tokens_stride = 25, language="czech", first_special_token = [0]):
273
- sentences = sent_tokenize(text, language=language)
274
- tokenized = self.tokenizer(sentences if sentences != [] else [""], return_length=True)
275
- chunks = []
276
- chunk, current_chunk_size = ([], 0)
277
- for i, length in enumerate(tokenized["length"]):
278
-
279
- # WRAP THE TOKENIZED SENTNECE INTO LIST TO HANDLE OVERFLOWING TOKENS EASILY
280
- # Case when length of one sentence is longer than the chunk size - split the sentence into chunks of chunk size
281
- if length > chunk_size:
282
- splits = [first_special_token + tokenized["input_ids"][i][max(1,cs):min(cs + chunk_size - 2, length - 1)] + [2] for cs in range(0, length , chunk_size-(2+overflowing_tokens_stride))]
283
- # Case when lenght of sequence is equal or smaller than the chunk size - only continue
284
- else:
285
- splits = [first_special_token + tokenized["input_ids"][i][1:]]
286
-
287
- # Go through sentence or splits of sentence
288
- for subsentence in splits:
289
- up_length = len(subsentence) - 2
290
-
291
- # Case when the current chunk = 0
292
- if current_chunk_size == 0:
293
- current_chunk_size = up_length + 2 # First include <s> and </s> tokens
294
- chunk = subsentence[:-1]
295
- # Case when the current chunk + length of new subsentence <= chunk_size - only add
296
- elif current_chunk_size + up_length <= chunk_size:
297
- current_chunk_size += up_length
298
- chunk += subsentence[1:-1]
299
- # Case when the current chunk + length of new subsentence > chunk_size - create chunk
300
- else:
301
- chunks += [chunk + [2]]
302
- current_chunk_size = up_length + 2 # First include <s> and </s> tokens
303
- chunk = subsentence[:-1]
304
- #Case when the loop ended but the current chunk isnt saved in the chunks
305
- if chunk != []:
306
- chunks += [chunk + [2]]
307
- # lengths = [len(ch) for ch in chunks]
308
- # print("Lenght in tokens of ",len(lengths)," chunks (AVG=",np.mean(lengths),",MAX=",np.max(lengths),",MIN=", np.min(lengths),")")
309
- return chunks
310
-
311
- """
312
- Chunks text into sentences using nlt.sent_tokenize
313
- """
314
- def chunk_sentences(self, text, chunk_size, overflowing_tokens_stride = 0, language="czech", sentence_window = 2, first_special_token = [2]):
315
- sentences = sent_tokenize(text, language=language)
316
- tokenized = self.tokenizer(sentences if sentences != [] else [""], return_length=True)
317
- chunks = []
318
- current_chunk = []
319
- for i, length in enumerate(tokenized["length"]):
320
- # WRAP THE TOKENIZED SENTNECE INTO LIST TO HANDLE OVERFLOWING TOKENS EASILY
321
- # Case when length of one sentence is longer than the chunk size - split the sentence into chunks of chunk size
322
- if length > chunk_size:
323
- splits = [first_special_token + tokenized["input_ids"][i][max(1,cs):min(cs + chunk_size - 2, length - 1)] + [2] for cs in range(0, length , chunk_size-(2+overflowing_tokens_stride))]
324
- # Case when lenght of sequence is equal or smaller than the chunk size - only continue
325
- else:
326
- splits = [first_special_token + tokenized["input_ids"][i][1:]]
327
-
328
- #Go through sentence or parts of sentence and create chunks
329
- for split in splits:
330
- chunks += [split]
331
- # if len(current_chunk) == sentence_window:
332
- # chunks += [first_special_token + [item for row in current_chunk for item in row] + [2]]
333
- # current_chunk = current_chunk[1:] + [split[1:-1]]
334
- # else:
335
- # current_chunk += [split[1:-1]]
336
-
337
- # if chunks == []:
338
- # chunks += [first_special_token + [item for row in current_chunk for item in row] + [2]]
339
- return chunks
340
-
341
- """
342
- join context and claim tokens as input_ids and create attention_mask
343
- """
344
- def fill_with_pad_tokens(self, first, second, max_length=512, pad_token = 1):
345
- return first + second + [pad_token]*max(max_length-len(first)-len(second),0), [1]*(len(first)+len(second)) + [0]*max(max_length-len(first)-len(second),0)
346
-
347
-
348
- def _update_chunked_inputs(self, tokenized_claim, current_chunk, return_chunked_inputs, max_length, tok_sent = {"input_ids" : []}):
349
- # truncate if there is a long sentence (rare occurrences)
350
- if len(current_chunk.get("input_ids",[0])) + tokenized_claim["length"][0] >= max_length:
351
- chunk = current_chunk["input_ids"].copy()[:max_length-tokenized_claim["length"][0]-1] + [2]
352
- elif not current_chunk.get("input_ids",False):
353
- chunk = tok_sent["input_ids"][: max_length - tokenized_claim["length"][0] -1] + [2]
354
- else:
355
- chunk = current_chunk["input_ids"].copy() + [2] # add </s> end of sentence
356
- claim_ids = tokenized_claim["input_ids"].copy()
357
- inputs, attention = self.fill_with_pad_tokens(chunk,claim_ids )
358
- return_chunked_inputs["input_ids"] = return_chunked_inputs.get("input_ids",[]) + [inputs]
359
- return_chunked_inputs["attention_mask"] = return_chunked_inputs.get("attention_mask",[]) + [attention]
360
- return return_chunked_inputs
361
-
362
  @classmethod
363
  def get_encoder_attr_name(cls, model):
364
  """
@@ -370,13 +125,19 @@ class AlignScoreCS(transformers.XLMRobertaModel):
370
  return "roberta"
371
  else:
372
  raise KeyError(f"Add support for new model {model_class_name}")
373
-
374
-
 
 
 
 
 
375
  @classmethod
376
  def from_pretrained(
377
  cls,
378
  pretrained_model_name_or_path: Optional[Union[str, os.PathLike]],
379
  model_name : str = "xlm-roberta-large",
 
380
  *model_args,
381
  config: Optional[Union[PretrainedConfig, str, os.PathLike]] = None,
382
  cache_dir: Optional[Union[str, os.PathLike]] = None,
@@ -388,19 +149,19 @@ class AlignScoreCS(transformers.XLMRobertaModel):
388
  use_safetensors: bool = None,
389
  **kwargs,
390
  ):
391
- # Check if the required model directories exist then load it from file
392
- if all(os.path.exists(os.path.join(pretrained_model_name_or_path, model_dir)) for model_dir in [cls._3way_class_model, cls._regression_model, cls._binary_class_model]):
393
- # assert all(
394
-
395
- # for model_dir in [cls._3way_class_model, cls._regression_model, cls._binary_class_model]
396
- # ), "Error: Required model directories not found!"
397
-
398
- # Disable the warning about newly initialized weights
399
  transformers.logging.set_verbosity_error()
400
 
401
  shared_encoder = None
402
  taskmodels_dict = {}
403
- for path_name in [cls._regression_model, cls._binary_class_model, cls._3way_class_model]:
404
  task_name = path_name.split("_")[0]
405
 
406
  # Load the configuration for the task-specific model
@@ -417,25 +178,56 @@ class AlignScoreCS(transformers.XLMRobertaModel):
417
  taskmodels_dict[task_name] = model
418
 
419
  # Create the AlignScoreCS with the shared encoder and loaded task-specific models
420
- alignScoreCS = AlignScoreCS(encoder=shared_encoder, taskmodels_dict=taskmodels_dict, model_name=model_name)
421
- #Try load the model from huggingface hub
422
- else:
423
  shared_encoder = None
424
  taskmodels_dict = {}
425
- for model_dir in [cls._regression_model, cls._binary_class_model, cls._3way_class_model]:
426
- task_name = model_dir.split("_")[0]
427
- config = transformers.XLMRobertaConfig.from_pretrained(f"{pretrained_model_name_or_path}", subfolder=model_dir)
428
- model = transformers.XLMRobertaForSequenceClassification.from_pretrained(f"{pretrained_model_name_or_path}",config=config, subfolder=model_dir)
 
429
  if shared_encoder is None:
430
  shared_encoder = getattr(model, AlignScoreCS.get_encoder_attr_name(model))
431
  else:
432
  setattr(model, AlignScoreCS.get_encoder_attr_name(model), shared_encoder)
433
  taskmodels_dict[task_name] = model
434
- alignScoreCS = AlignScoreCS(encoder=shared_encoder, taskmodels_dict=taskmodels_dict, model_name=model_name)
435
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
436
  return alignScoreCS
437
 
438
-
 
 
439
  def save_pretrained(
440
  self,
441
  save_directory: Union[str, os.PathLike],
@@ -450,8 +242,11 @@ class AlignScoreCS(transformers.XLMRobertaModel):
450
  save_peft_format: bool = True,
451
  **kwargs,
452
  ):
 
 
453
  for task_name, model_type in self.taskmodels_dict.items():
454
- model_type.save_pretrained(save_directory = Path(save_directory,task_name+"_model"),
 
455
  is_main_process = is_main_process,
456
  state_dict = state_dict,
457
  save_function = save_function,
@@ -463,46 +258,90 @@ class AlignScoreCS(transformers.XLMRobertaModel):
463
  save_peft_format = save_peft_format,
464
  **kwargs)
465
 
466
- # This piece of code is copied from AlignScore github repository
467
- # if you want to use different nlg_eval_mode you have to fix errors on your own
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
468
  class InferenceHandler:
469
- def __init__(self, model, tokenizer, device = "cuda"):
 
 
 
 
 
 
 
 
 
 
470
  self.model = model
471
  self.device = device
472
  self.tokenizer = tokenizer
473
- self.model.to(self.device)
474
  self.model.eval()
475
- self.batch_size = 32
476
- self.nlg_eval_mode = "nli_sp"
477
- self.verbose = False
478
- self.task_name = "3way"
479
  self.softmax = nn.Softmax(dim=-1)
 
480
 
481
- def nlg_eval(self, premise, hypo):
 
482
  if isinstance(premise, str) and isinstance(hypo, str):
483
  premise = [premise]
484
  hypo = [hypo]
485
- return self.inference_example_batch(premise, hypo)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
486
 
487
- def inference_example_batch(self, premise: list, hypo: list):
488
  """
489
  inference a example,
490
  premise: list
491
  hypo: list
492
  using self.inference to batch the process
493
-
494
  SummaC Style aggregation
495
  """
496
  self.disable_progress_bar_in_inference = True
497
  assert len(premise) == len(hypo), "Premise must has the same length with Hypothesis!"
498
 
499
  out_score = []
500
- for one_pre, one_hypo in tqdm(zip(premise, hypo), desc="Evaluating", total=len(premise), disable=(not self.verbose)):
501
- out_score.append(self.inference_per_example(one_pre, one_hypo))
502
 
503
  return torch.tensor(out_score)
504
 
505
- def inference_per_example(self, premise:str, hypo: str):
506
  """
507
  inference a example,
508
  premise: string
@@ -522,34 +361,30 @@ class AlignScoreCS(transformers.XLMRobertaModel):
522
  premise_sents = [each for each in chunks(premise_sents, n_chunk)]
523
 
524
  hypo_sents = sent_tokenize(hypo)
 
 
 
 
525
 
526
  premise_sent_mat = []
527
  hypo_sents_mat = []
528
  for i in range(len(premise_sents)):
529
  for j in range(len(hypo_sents)):
530
  premise_sent_mat.append(premise_sents[i])
531
- hypo_sents_mat.append(hypo_sents[j])
532
-
533
- if self.nlg_eval_mode is not None:
534
- if self.nlg_eval_mode == 'nli_sp':
535
- output_score = self.inference(premise_sent_mat, hypo_sents_mat)[:,1] ### use NLI head OR ALIGN head
536
- output_score = output_score.view(len(premise_sents), len(hypo_sents)).max(dim=0).values.mean().item() ### sum or mean depends on the task/aspect
537
-
538
- return output_score
539
 
540
-
541
  output_score = self.inference(premise_sent_mat, hypo_sents_mat) ### use NLI head OR ALIGN head
542
- output_score = output_score.view(len(premise_sents), len(hypo_sents)).max(dim=0).values.mean().item() ### sum or mean depends on the task/aspect
543
-
 
 
544
  return output_score
545
 
546
- def inference(self, premise, hypo, task_name = None):
547
  """
548
  inference a list of premise and hypo
549
-
550
  Standard aggregation
551
  """
552
- task_name = self.task_name if task_name is None else task_name
553
  if isinstance(premise, str) and isinstance(hypo, str):
554
  premise = [premise]
555
  hypo = [hypo]
@@ -560,28 +395,23 @@ class AlignScoreCS(transformers.XLMRobertaModel):
560
  for mini_batch in tqdm(batch, desc="Evaluating", disable=not self.verbose or self.disable_progress_bar_in_inference):
561
  mini_batch = mini_batch.to(self.device)
562
  with torch.no_grad():
563
- model_output = self.model.forward(task_name=task_name, **mini_batch)
564
  model_output = model_output.logits
565
- if task_name == "re":
566
  model_output = model_output.cpu()
 
567
  else:
568
  model_output = self.softmax(model_output).cpu()
569
- output_score.append(model_output[:,:])
570
-
 
 
 
 
 
 
571
  output_score = torch.cat(output_score)
572
 
573
- if self.nlg_eval_mode is not None:
574
- if self.nlg_eval_mode == 'nli':
575
- output_score_nli = output_score[:,1]
576
- return output_score_nli
577
- elif self.nlg_eval_mode == 'bin':
578
- return output_score
579
- elif self.nlg_eval_mode == 'reg':
580
- return output_score
581
- else:
582
- ValueError("unrecognized nlg eval mode")
583
-
584
-
585
  return output_score
586
 
587
  def batch_tokenize(self, premise, hypo):
@@ -606,29 +436,24 @@ class AlignScoreCS(transformers.XLMRobertaModel):
606
  """Yield successive n-sized chunks from lst."""
607
  for i in range(0, len(lst), n):
608
  yield lst[i:i + n]
609
-
610
-
611
-
612
- if __name__ == "__main__":
613
- alignScore = AlignScoreCS.from_pretrained("krotima1/AlignScoreCS")
614
- alignScore.to("cuda" if torch.cuda.is_available() else "cpu")
615
-
616
- print("Tomáš miluje Zuzku!", "|", "Tomáš miluje Petru!",alignScore.score("Tomáš miluje Zuzku!", "Tomáš miluje Petru."))
617
- print("Tomáš miluje Zuzku!", "|", "Tomáš miluje Zuzku!",alignScore.score("Tomáš miluje Zuzku!", "Tomáš miluje Zuzku!"))
618
- print("Tomáš miluje Zuzku.", "|", "Zuzka miluje Tomáše.",alignScore.score("Tomáš miluje Zuzku!", "Zuzka miluje Tomáše."))
619
- print("Tomáš miluje Zuzku.", "|", "Zuzka nemiluje Tomáše.",alignScore.score("Tomáš miluje Zuzku!", "Zuzka nemiluje Tomáše."))
620
- print("Tomáš miluje Zuzku.", "|", "Tomáš nemiluje Zuzku.",alignScore.score("Tomáš miluje Zuzku!", "Tomáš nemiluje Zuzku."))
621
- print("Dva chlapi se perou.", "|", "Je tu bitka.",alignScore.score("Dva chlapi se perou.", "Je tu bitka."))
622
- print("Dva chlapi se perou.", "|", "Je tu láska.",alignScore.score("Dva chlapi se perou.", "Je tu láska."))
623
- print("Karel IV. je Otec vlasti. Nechal postavit katedrálu svatého Víta. \n Kdo nechal vystavět katedrálu?", "|", "Byl to Karel.",alignScore.score("Karel IV. je Otec vlasti. Nechal postavit katedrálu svatého Víta.\nKdo nechal vystavět katedrálu?", "Byl to Karel."))
624
- print("Karel IV. je Otec vlasti. Nechal postavit katedrálu svatého Víta. \n Kdo nechal vystavět katedrálu?", "|", "Byl to Vít.",alignScore.score("Karel IV. je Otec vlasti. Nechal postavit katedrálu svatého Víta.\nKdo nechal vystavět katedrálu?", "Byl to Vít."))
625
- print("Karel IV. je Otec vlasti. Nechal postavit katedrálu svatého Víta. \n Kdo nechal vystavět katedrálu?", "|", "Byla to katedrála.",alignScore.score("Karel IV. je Otec vlasti. Nechal postavit katedrálu svatého Víta.\nKdo nechal vystavět katedrálu?", "Byla to katedrála."))
626
- print("Kdo je Karel IV.? Karel IV. je Otec vlasti. Nechal postavit katedrálu svatého Víta.", "|", "Je Otec.",alignScore.score("Kdo je Karel IV.? Karel IV. je Otec vlasti. Nechal postavit katedrálu svatého Víta.", "Je Otec."))
627
- print("Kdo je Karel IV.? Karel IV. je Otec vlasti. Nechal postavit katedrálu svatého Víta.", "|", "Je Otec vlasti.",alignScore.score("Kdo je Karel IV.? Karel IV. je Otec vlasti. Nechal postavit katedrálu svatého Víta.", "Je Otec vlasti."))
628
- print("Kdo je Karel IV.? Karel IV. je Otec vlasti. Nechal postavit katedrálu svatého Víta.", "|", "Je katedrála svatého Víta.",alignScore.score("Kdo je Karel IV.? Karel IV. je Otec vlasti. Nechal postavit katedrálu svatého Víta.", "Je katedrála svatého Víta."))
629
- print("Karkulka šla do lesa. V lese potkala vlka. Vlk ji zkoušel sníst, ale Karkulka se nedala a Vlkovi utekla!", "|", "Karkulka utekla vklovi.",alignScore.score("Karkulka šla do lesa. V lese potkala vlka. Vlk ji zkoušel sníst, ale Karkulka se nedala a Vlkovi utekla!", "Karkulka utekla vklovi."))
630
- print("Karkulka šla do lesa. V lese potkala vlka. Vlk ji zkoušel sníst, ale Karkulka se nedala a Vlkovi utekla!", "|", "Karkulka neutekla vklovi.",alignScore.score("Karkulka šla do lesa. V lese potkala vlka. Vlk ji zkoušel sníst, ale Karkulka se nedala a Vlkovi utekla!", "Karkulka neutekla vklovi."))
631
- print("Karkulka šla do lesa. V lese potkala vlka. Vlk ji zkoušel sníst, ale Karkulka se nedala a Vlkovi utekla!", "|", "Vlk snědl Karkulku.",alignScore.score("Karkulka šla do lesa. V lese potkala vlka. Vlk ji zkoušel sníst, ale Karkulka se nedala a Vlkovi utekla!", "Vlk snědl karkulku."))
632
- print("Karkulka šla do lesa. V lese potkala vlka. Vlk ji zkoušel sníst, ale Karkulka se nedala a Vlkovi utekla!", "|", "Vlk nesnědl Karkulku.",alignScore.score("Karkulka šla do lesa. V lese potkala vlka. Vlk ji zkoušel sníst, ale Karkulka se nedala a Vlkovi utekla!", "Vlk nesnědl karkulku."))
633
- print("Karkulka šla do lesa. V lese potkala vlka. Vlk ji zkoušel sníst, ale Karkulka se nedala a Vlkovi utekla!", "|", "Karkulka snědla vlka.",alignScore.score("Karkulka šla do lesa. V lese potkala vlka. Vlk ji zkoušel sníst, ale Karkulka se nedala a Vlkovi utekla!", "Karkulka snědla vlka."))
634
- print("Karkulka šla do lesa. V lese potkala vlka. Vlk ji zkoušel sníst, ale Karkulka se nedala a Vlkovi utekla!", "|", "Karkulka dala vlkovi jablko.",alignScore.score("Karkulka šla do lesa. V lese potkala vlka. Vlk ji zkoušel sníst, ale Karkulka se nedala a Vlkovi utekla!", "Karkulka dala vlkovi jablko."))
 
6
  from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple, Union
7
  import torch.nn as nn
8
  import torch
9
+ from nltk.tokenize import sent_tokenize #If you don't have nltk, you can use another sentence tokenizer
 
10
  from tqdm import tqdm
11
 
12
  class AlignScoreCS(transformers.XLMRobertaModel):
13
  """
14
+ AlignScoreCS class
 
15
  Description:
16
  Model ALIGNSCORECS has been trained according the paper for 3 days on 4GPUs AMD NVIDIA.
17
  (3 epochs, 1e-5 learning rate, 1e-6 AdamWeps, batchsize 32, WarmupRatio 0.06, 0.1 WeighDecay)
18
+ - XLMROBERTA-large model with 3 classification HEAD {regression,binary,3way} using shared encoder
19
+ - trained on 7M docs incorporating various NLP tasks (QA,STS,Summarization,FactVer,InforRetrievel,NLI,Paraphrase..)
20
+ - English and Czech translated datasets
21
+
22
+ TRY: .show_examples() to see some examples
23
 
24
  USAGE: AlignScore.py
25
+ - .from_pretrained - loads the model, usage as transformers.model
26
+
27
  - .score(context, claim) - function
28
  - returns probs of the ALIGNED class using 3way class head as in the paper.
29
+
30
+ - .classify(context, claim) - function
31
+ - returns predicted class using bin class head as in the paper.
32
+
33
  alignScoreCS = AlignScoreCS.from_pretrained("/mnt/data/factcheck/AlignScore-data/AAmodel/MTLModel/mo
34
  alignScoreCS.score(context,claim)
35
+
36
  If you want to try different classification head use parameter:
37
  - task_name = "re" : regression head
38
  - task_name = "bin" : binary classification head
39
  - task_name = "3way" : 3way classification head
 
40
  """
41
  _regression_model = "re_model"
42
  _binary_class_model = "bin_model"
 
46
  super().__init__(transformers.XLMRobertaConfig(), **kwargs)
47
  self.encoder = encoder
48
  self.taskmodels_dict = nn.ModuleDict(taskmodels_dict)
 
49
  self.model_name = model_name
50
+ self.tokenizer = None
51
  self.inferencer = None
52
+ self.init_inferencer(device = "cpu")
53
+
54
 
55
  def init_inferencer(self, device = "cuda"):
56
  self.tokenizer = transformers.AutoTokenizer.from_pretrained(self.model_name) if not self.tokenizer else self.tokenizer
57
  self.inferencer = self.InferenceHandler(self, self.tokenizer, device)
58
 
 
 
59
  """
60
+ Score: scores the context and claim with Aligned probabitlity of given classification head
61
+ - using altered code inferencer from ALignScore
62
+
63
+ context : list or str
64
+ claim : list or str
65
+ eval_mode : {nli, bin, re, nli_sp, bin_sp or re_sp}
66
+ nli - 3way head
67
+ bin - 2way head
68
+ re - regression head
69
+ (sp - indicates whether to apply alignscore function chunk context and split claim into sentences
70
+ otherwise it truncates the text and returns probability of Aligned from that)
71
+ eval_question_answer : list or str representing question if you want to evaluate context-answer question
72
+ DEFAULT: nli_sp
73
+ Returns the consistency score (probability of Aligned class of 3-way head) between context text and claim text
74
+ - using 2way classification head
75
 
76
  """
77
+ def score(self, context, claim, eval_mode = "nli_sp", eval_question_answer = None, **kwargs):
78
+
79
+ scores = self.inferencer.nlg_eval(context, claim, eval_mode=eval_mode, question = eval_question_answer)
 
80
  return scores
81
+
82
 
83
  """
84
+ Classify: classify the context and claim to the class label given the eval model
85
+ context : list or str
86
+ claim : list or str
87
+ eval mode : {nli, bin, re, nli_sp, bin_sp or re_sp}
88
+ nli - 3way head
89
+ bin - 2way head
90
+ re - regression head
91
+ (sp - indicates whether to apply alignscore classification function chunk context and split claim into sentences
92
+ otherwise it truncates the text and returns predicted class)
93
+ DEFAULT: bin_sp
94
+ Returns the class of {Contradict, Aligned} between context text and claim text
95
+ - using 2way classification head
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  """
97
+ def classify(self, context, claim, eval_mode = "bin_sp", **kwargs):
98
+ eval_mode = eval_mode+"_cls" if ("cls" not in eval_mode) and ("class" not in eval_mode) else eval_mode
99
+ scores = self.inferencer.nlg_eval(context, claim, eval_mode=eval_mode)
100
+ return scores
101
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
 
103
  def forward(self, task_name = "3way", **kwargs):
104
  return self.taskmodels_dict[task_name](**kwargs)
105
 
106
  def __call__(self, task_name, **kwargs):
107
  return self.taskmodels_dict[task_name](**kwargs)
 
 
 
 
 
 
 
 
 
108
 
 
 
 
 
 
 
 
 
 
 
 
109
 
110
+ def to(self, device, **kwargs):
111
+ self.init_inferencer(device = device)
112
+ return super().to(device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
 
 
 
114
 
115
+ return self
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  @classmethod
118
  def get_encoder_attr_name(cls, model):
119
  """
 
125
  return "roberta"
126
  else:
127
  raise KeyError(f"Add support for new model {model_class_name}")
128
+ """
129
+ pretrained_model_name_or_path :str "krotima1/AlignScoreCS" // but it is possible to use another NLI model but specify load_specific_head to 3way
130
+ - path to the directory of AlignScoreCS
131
+ - or pass "build_new" to create new multitask AlignScore architecture.
132
+ load_specific_head :str ["re", "bin", "3way"] or None // use this, and it will load only one architecture
133
+ load_another_model
134
+ """
135
  @classmethod
136
  def from_pretrained(
137
  cls,
138
  pretrained_model_name_or_path: Optional[Union[str, os.PathLike]],
139
  model_name : str = "xlm-roberta-large",
140
+ load_specific_head = None,
141
  *model_args,
142
  config: Optional[Union[PretrainedConfig, str, os.PathLike]] = None,
143
  cache_dir: Optional[Union[str, os.PathLike]] = None,
 
149
  use_safetensors: bool = None,
150
  **kwargs,
151
  ):
152
+
153
+ architectures = [cls._3way_class_model, cls._regression_model, cls._binary_class_model] if load_specific_head is None else {"re": [cls._regression_model], "bin": [cls._binary_class_model], "3way" : [cls._3way_class_model]}[load_specific_head]
154
+ is_3way_included = "3way" == load_specific_head or load_specific_head is None
155
+
156
+ # DEPRECATED = it is here only because of loading previous versions... load from file
157
+ if all(os.path.exists(os.path.join(pretrained_model_name_or_path, model_dir)) for model_dir in architectures):
158
+
159
+ # Disables the warning
160
  transformers.logging.set_verbosity_error()
161
 
162
  shared_encoder = None
163
  taskmodels_dict = {}
164
+ for path_name in tqdm(architectures, desc='DEPRECATED: Loading architectures from a local directory'):
165
  task_name = path_name.split("_")[0]
166
 
167
  # Load the configuration for the task-specific model
 
178
  taskmodels_dict[task_name] = model
179
 
180
  # Create the AlignScoreCS with the shared encoder and loaded task-specific models
181
+ return AlignScoreCS(encoder=shared_encoder, taskmodels_dict=taskmodels_dict, model_name=model_name)
182
+ # UP TO DATE LOADING FROM FILE:
183
+ if all(os.path.exists(os.path.join(pretrained_model_name_or_path, {"3way_model" : "pytorch_model.bin"}.get(model_dir,model_dir))) for model_dir in architectures):
184
  shared_encoder = None
185
  taskmodels_dict = {}
186
+ for path_name in tqdm(architectures, desc='Loading architectures from a local directory'):
187
+ task_name = path_name.split("_")[0]
188
+ load_path = pretrained_model_name_or_path if task_name == "3way" else "{}/{}".format(pretrained_model_name_or_path,path_name)
189
+ task_config = transformers.XLMRobertaConfig.from_json_file("{}/config.json".format(load_path))
190
+ model = transformers.XLMRobertaForSequenceClassification.from_pretrained("{}".format(load_path), config=task_config,*model_args,**kwargs)
191
  if shared_encoder is None:
192
  shared_encoder = getattr(model, AlignScoreCS.get_encoder_attr_name(model))
193
  else:
194
  setattr(model, AlignScoreCS.get_encoder_attr_name(model), shared_encoder)
195
  taskmodels_dict[task_name] = model
196
+ return AlignScoreCS(encoder=shared_encoder, taskmodels_dict=taskmodels_dict, model_name=model_name)
197
+ # BUILD NEW AlignScoreCS
198
+ if pretrained_model_name_or_path == "build_new":
199
+ shared_encoder = None
200
+ taskmodels_dict = {}
201
+ for path_name in tqdm([cls._3way_class_model, cls._regression_model, cls._binary_class_model], desc=f'Building new architectures from {model_name}'):
202
+ task_name = path_name.split("_")[0]
203
+ task_config = transformers.XLMRobertaConfig.from_pretrained(model_name)
204
+ model = transformers.XLMRobertaForSequenceClassification.from_pretrained(model_name, config=task_config,*model_args,**kwargs)
205
+ if shared_encoder is None:
206
+ shared_encoder = getattr(model, AlignScoreCS.get_encoder_attr_name(model))
207
+ else:
208
+ setattr(model, AlignScoreCS.get_encoder_attr_name(model), shared_encoder)
209
+ taskmodels_dict[task_name] = model
210
+ return AlignScoreCS(encoder=shared_encoder, taskmodels_dict=taskmodels_dict, model_name=model_name)
211
+
212
+ #LOADING FROM HUGGINGFACE HUB
213
+ shared_encoder = None
214
+ taskmodels_dict = {}
215
+ for model_dir in tqdm(architectures, desc='Loading from huggingface HUB'):
216
+ task_name = model_dir.split("_")[0]
217
+ subfolder = '' if task_name == "3way" else model_dir
218
+ config = transformers.XLMRobertaConfig.from_pretrained(f"{pretrained_model_name_or_path}", subfolder=subfolder)
219
+ model = transformers.XLMRobertaForSequenceClassification.from_pretrained(f"{pretrained_model_name_or_path}",config=config, subfolder=subfolder)
220
+ if shared_encoder is None:
221
+ shared_encoder = getattr(model, AlignScoreCS.get_encoder_attr_name(model))
222
+ else:
223
+ setattr(model, AlignScoreCS.get_encoder_attr_name(model), shared_encoder)
224
+ taskmodels_dict[task_name] = model
225
+ alignScoreCS = AlignScoreCS(encoder=shared_encoder, taskmodels_dict=taskmodels_dict, model_name=model_name)
226
  return alignScoreCS
227
 
228
+ """
229
+ This saves the architectures into the directory. Model with 3way head is in the main dir, while bin and reg are in subfolders (bin_model, re_model).
230
+ """
231
  def save_pretrained(
232
  self,
233
  save_directory: Union[str, os.PathLike],
 
242
  save_peft_format: bool = True,
243
  **kwargs,
244
  ):
245
+ #It would be awesome to rewrite this to save only the classifier's head from taskmodels_dict and one encoder instead of 3x same encoder.
246
+ #But who cares - only those who need save storage
247
  for task_name, model_type in self.taskmodels_dict.items():
248
+ final_directory = Path(save_directory,task_name+"_model") if task_name in ["re", "bin"] else Path(save_directory)
249
+ model_type.save_pretrained(save_directory = final_directory,
250
  is_main_process = is_main_process,
251
  state_dict = state_dict,
252
  save_function = save_function,
 
258
  save_peft_format = save_peft_format,
259
  **kwargs)
260
 
261
+ """
262
+ This piece of code is copied and modified from AlignScore github code from: https://github.com/yuh-zha/AlignScore/blob/main/src/alignscore/inference.py
263
+ ### eval_mode ####
264
+ ## sp ## means chunk context to roughly 300 tokens and split claim into sentneces then apply AlignScore function to get consistency score
265
+ - nli_sp - ## DEFAULT ## returns consistency score of Aligned class from 3way head given context and claim using chunking
266
+ - bin_sp - returns consistency score of Aligned class from 2way head given context and claim using chunking
267
+ - re_sp - returns output of regression head given context and claim using chunking.
268
+ ## simple ## without chunking and splitting
269
+ - nli - returns probability of Aligned class from 3way head given context and claim.
270
+ - bin - returns probability of Aligned class from 2way head given context and claim.
271
+ - re - returns output of regression head given context and claim.
272
+ ## sp_cls ## means chunk context to roughly 300 tokens and split claim into sentneces then apply AlignScore Classification function to get class
273
+ - nli_sp_cls - returns class from 3way head given context and claim using chunking
274
+ - bin_sp_cls - returns class from 2way head given context and claim using chunking
275
+ - re_sp_cls - returns output from regression head given context and claim using chunking
276
+ ## simple ## without chunking and splitting
277
+ - nli_cls - returns class of Aligned class from 3way head given context and claim.
278
+ - bin_cls - returns class from 2way head given context and claim.
279
+ - re_cls - returns output of regression head given context and claim.
280
+ """
281
  class InferenceHandler:
282
+ def __init__(self, model, tokenizer, device = "cuda", batch_size = 32, verbose = False):
283
+ # self.position_of_aligned_class = {"3way" : 1, "bin" : 1}
284
+ self.input_evalmode_handler = {"3way_sp" : "nli_sp", "3way_sp_class" : "nli_sp_cls", "3way" : "nli", "3-way" : "nli", "3way_class" : "nli_cls",
285
+ "2way_sp" : "bin_sp", "2way_sp_class" : "bin_sp_cls", "2way" : "bin", "2-way" : "bin", "2way_class" : "bin_cls",
286
+ "reg_sp" : "re_sp", "reg_sp_class" : "re_sp_cls", "reg" : "re", "reg_class" : "re_cls"}
287
+ self.taskname_handler = lambda eval_mode: "3way" if "nli" in eval_mode else ("bin" if "bin" in eval_mode else "re")
288
+ #DEFAULT
289
+ self.nlg_eval_mode = "nli_sp"
290
+ self.task_name = "3way"
291
+
292
+ #Model setup
293
  self.model = model
294
  self.device = device
295
  self.tokenizer = tokenizer
296
+ # self.model.to(self.device)
297
  self.model.eval()
298
+
299
+ self.batch_size = batch_size
300
+ self.verbose = verbose
301
+
302
  self.softmax = nn.Softmax(dim=-1)
303
+
304
 
305
+ def nlg_eval(self, premise, hypo, eval_mode = "nli_sp", question = None):
306
+
307
  if isinstance(premise, str) and isinstance(hypo, str):
308
  premise = [premise]
309
  hypo = [hypo]
310
+ if (isinstance(question,str)):
311
+ question = [question]
312
+
313
+ if question is None:
314
+ question = [None]*len(premise)
315
+
316
+ #setup
317
+ self.nlg_eval_mode = self.input_evalmode_handler.get(eval_mode, eval_mode)
318
+ self.task_name = self.taskname_handler(self.nlg_eval_mode)
319
+ assert self.nlg_eval_mode in set(self.input_evalmode_handler.values()), f"eval_mode is wrong {self.nlg_eval_mode}, use please : nli_sp or any other, look at the comments."
320
+
321
+ if "sp" in self.nlg_eval_mode:
322
+ return self.inference_example_batch(premise, hypo, question)
323
+ elif "sp" not in self.nlg_eval_mode:
324
+ return self.inference(premise, hypo)
325
+ return None
326
 
327
+ def inference_example_batch(self, premise: list, hypo: list, question : list):
328
  """
329
  inference a example,
330
  premise: list
331
  hypo: list
332
  using self.inference to batch the process
 
333
  SummaC Style aggregation
334
  """
335
  self.disable_progress_bar_in_inference = True
336
  assert len(premise) == len(hypo), "Premise must has the same length with Hypothesis!"
337
 
338
  out_score = []
339
+ for one_pre, one_hypo, one_quest in tqdm(zip(premise, hypo, question), desc="Evaluating", total=len(premise), disable=(not self.verbose)):
340
+ out_score.append(self.inference_per_example(one_pre, one_hypo, one_quest))
341
 
342
  return torch.tensor(out_score)
343
 
344
+ def inference_per_example(self, premise:str, hypo: str, quest = None):
345
  """
346
  inference a example,
347
  premise: string
 
361
  premise_sents = [each for each in chunks(premise_sents, n_chunk)]
362
 
363
  hypo_sents = sent_tokenize(hypo)
364
+
365
+ #add question to each sentence
366
+ if quest is not None:
367
+ hypo_sents = [quest+" "+ sent for sent in hypo_sents]
368
 
369
  premise_sent_mat = []
370
  hypo_sents_mat = []
371
  for i in range(len(premise_sents)):
372
  for j in range(len(hypo_sents)):
373
  premise_sent_mat.append(premise_sents[i])
374
+ hypo_sents_mat.append(hypo_sents[j])
 
 
 
 
 
 
 
375
 
 
376
  output_score = self.inference(premise_sent_mat, hypo_sents_mat) ### use NLI head OR ALIGN head
377
+ if "cls" in self.nlg_eval_mode:
378
+ output_score = output_score.view(len(premise_sents), len(hypo_sents),-1).mean(1).mean(0).argmax().item()
379
+ else:
380
+ output_score = output_score.view(len(premise_sents), len(hypo_sents)).max(dim=0).values.mean().item() ### sum or mean depends on the task/aspect
381
  return output_score
382
 
383
+ def inference(self, premise, hypo):
384
  """
385
  inference a list of premise and hypo
 
386
  Standard aggregation
387
  """
 
388
  if isinstance(premise, str) and isinstance(hypo, str):
389
  premise = [premise]
390
  hypo = [hypo]
 
395
  for mini_batch in tqdm(batch, desc="Evaluating", disable=not self.verbose or self.disable_progress_bar_in_inference):
396
  mini_batch = mini_batch.to(self.device)
397
  with torch.no_grad():
398
+ model_output = self.model.forward(task_name=self.task_name, **mini_batch)
399
  model_output = model_output.logits
400
+ if self.task_name == "re":
401
  model_output = model_output.cpu()
402
+ model_output = model_output[:,0]
403
  else:
404
  model_output = self.softmax(model_output).cpu()
405
+ if "cls" in self.nlg_eval_mode:
406
+ model_output = model_output
407
+ if "sp" not in self.nlg_eval_mode:
408
+ model_output = model_output.argmax(-1)
409
+ else:
410
+ model_output = model_output[:,1]
411
+
412
+ output_score.append(model_output)
413
  output_score = torch.cat(output_score)
414
 
 
 
 
 
 
 
 
 
 
 
 
 
415
  return output_score
416
 
417
  def batch_tokenize(self, premise, hypo):
 
436
  """Yield successive n-sized chunks from lst."""
437
  for i in range(0, len(lst), n):
438
  yield lst[i:i + n]
439
+
440
+ def show_examples(self):
441
+ self.to("cuda" if torch.cuda.is_available() else "cpu")
442
+ contexts = ["Jaromír Jágr (68) střelil poslední gól sezóny do branky Dominika Haška. Davy šílely dokonce i po celém zápase."]
443
+ claims = ["Dav šílel, když Jarda (68) střelil gól.", "Dav šílel, když Jarda (78) střelil gól.", "Dav šílel jen při zápase, když Jarda (68) střelil gól.", "Dominik Hašek nedokázal chytit poslední střelu od Jágra.",
444
+ "Dominik Jágr (68) střelil poslední gól sezóny do branky Jaromíra Haška.", "Dominik Jágr (68) střelil poslední gól sezóny do branky Dominika Haška.", "Jaromír jágr nestřelil gól v sezóně.",
445
+ "Davy šílely, když střelily gól do branky Dominika Haška.","Davy šílely, když davy střelily gól do branky Dominika Haška.", "Dav šílel. Jarda střelil gól.", "Dav šílel. Jarda nestřelil gól.",
446
+ "Dneska odevzdávám diplomovou práci a koukám na hokej.", "Téma pojednává o hokeji", "Téma pojednává o baletu", "Dominik hašek je brankář", "Dominik hašek je útočník", "Jaromír Jágr je střelec", "Jaromír Jágr je hokejový útočník",
447
+ "Jaromír Jágr je hokejový brankář", "Na utkání se dívaly davy lidí, které byly potichu.", "Na utkání se dívaly davy lidí, které šílely."]
448
+ print("EXAMPLES:")
449
+ print("context:",contexts[0])
450
+ print("SCORE: ", "claims:")
451
+ for co, cl in zip(contexts*len(claims),claims):
452
+ print(round(self.score(co,cl,eval_mode="nli_sp").tolist()[0],5),cl)
453
+ print("EXAMPLES QA:")
454
+ print("SCORE: ", "q-a pairs:")
455
+ claims = [("Kdo střelil gól?", "Jaromír Jágr."), ("Kdo střelil gól?", "Domink Hašek."), ("Kdo nechytil střelu?", "Jaromír Jágr."), ("Kdo nechytil střelu?", "Domink Hašek.")
456
+ , ("Jaký číslo drezu Jaromír Jágr?", "Jaromír Jágr číslo drezu 68."), ("Kolik je Jaromíru Jágrovi let?", "Jaromíru Jágrovi je 68."), ("Kolik je Jaromíru Jágrovi let?", "Jaromíru Jágrovi je 67.")
457
+ , ("Co udělali lidi, když Jágr střelil gól?", "Lidi začali šílet. Dokonce šílely i po zápase."), ("Co udělali lidi, když Jágr střelil gól?", "Šli dát góla Haškovi")]
458
+ for co,cl in zip(contexts*len(claims),claims):
459
+ print(round(model.score(co, cl[1],eval_mode="nli_sp",eval_question_answer=cl[0] ).tolist()[0],5)," ".join(cl))