Zhengping commited on
Commit
2d5275c
·
verified ·
1 Parent(s): e6945e3

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +49 -12
README.md CHANGED
@@ -273,7 +273,44 @@ class LevelToScorePipeline(TextGenerationPipeline):
273
  return records
274
 
275
 
276
- from src.rank_dicts import SingleLabelRankDict
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
277
 
278
 
279
  model = transformers.AutoModelForCausalLM.from_pretrained(
@@ -327,17 +364,17 @@ premise = "Sam is sleeping."
327
  hypothesis = "Sam is awake."
328
 
329
  inputs = [
330
- {
331
- "role": "user",
332
- "content": "### Question: Given the premise \"{premise}\", how likely is it that the hypothesis \"{hypothesis}\" is true?\n\n".format(
333
- premise=premise,
334
- hypothesis=hypothesis
335
- )
336
- },
337
- {
338
- "role": "assitant",
339
- "content": "### Answer:"
340
- }
341
  ]
342
 
343
  result = pipe(inputs)
 
273
  return records
274
 
275
 
276
+ class SingleLabelRankDict:
277
+ def __init__(
278
+ self,
279
+ rank_dict: Dict[Text, Any]
280
+ ):
281
+ self._rank_dict = rank_dict
282
+
283
+ def __len__(self) -> int:
284
+ return len(self._rank_dict)
285
+
286
+ def get_rank_dict(self, tokenizer: PreTrainedTokenizer) -> Dict[int, Any]:
287
+ return {tokenizer.convert_tokens_to_ids([token])[0]: value for token, value in self._rank_dict.items()}
288
+
289
+ def to_tokenizer(self, tokenizer: PreTrainedTokenizer) -> PreTrainedTokenizer:
290
+ """Augment tokenizer vocab with `rank_dict` IN-PLACE.
291
+ """
292
+ vocabs: List[Text] = self._rank_dict.keys()
293
+ new_vocab = [vocab for vocab in vocabs if vocab not in tokenizer.get_vocab()]
294
+ tokenizer.add_tokens(new_vocab)
295
+ return tokenizer
296
+ def from_tokenizer(cls, tokenizer: PreTrainedTokenizer) -> "SingleLabelRankDict":
297
+ vocab = tokenizer.get_vocab()
298
+ rank_dict = {}
299
+ pattern = re.compile(r" <\|label_level_(\d+)\|>")
300
+
301
+ for token in vocab.keys():
302
+ match = pattern.match(token)
303
+ if match:
304
+ value = int(match.group(1))
305
+ # normalized_value = value / (len(vocab) - 1)
306
+ rank_dict[token] = value
307
+
308
+ # normalize rank_values
309
+ num_levels = max(rank_dict.values()) + 1
310
+ for token in rank_dict.keys():
311
+ rank_dict[token] = 1. / num_levels * (rank_dict[token] + 0.5)
312
+
313
+ return cls(rank_dict=rank_dict)
314
 
315
 
316
  model = transformers.AutoModelForCausalLM.from_pretrained(
 
364
  hypothesis = "Sam is awake."
365
 
366
  inputs = [
367
+ {
368
+ "role": "user",
369
+ "content": "### Question: Given the premise \"{premise}\", how likely is it that the hypothesis \"{hypothesis}\" is true?\n\n".format(
370
+ premise=premise,
371
+ hypothesis=hypothesis
372
+ )
373
+ },
374
+ {
375
+ "role": "assitant",
376
+ "content": "### Answer:"
377
+ }
378
  ]
379
 
380
  result = pipe(inputs)