Update README.md
Browse files
README.md
CHANGED
|
@@ -273,7 +273,44 @@ class LevelToScorePipeline(TextGenerationPipeline):
|
|
| 273 |
return records
|
| 274 |
|
| 275 |
|
| 276 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 277 |
|
| 278 |
|
| 279 |
model = transformers.AutoModelForCausalLM.from_pretrained(
|
|
@@ -327,17 +364,17 @@ premise = "Sam is sleeping."
|
|
| 327 |
hypothesis = "Sam is awake."
|
| 328 |
|
| 329 |
inputs = [
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
]
|
| 342 |
|
| 343 |
result = pipe(inputs)
|
|
|
|
| 273 |
return records
|
| 274 |
|
| 275 |
|
| 276 |
+
class SingleLabelRankDict:
|
| 277 |
+
def __init__(
|
| 278 |
+
self,
|
| 279 |
+
rank_dict: Dict[Text, Any]
|
| 280 |
+
):
|
| 281 |
+
self._rank_dict = rank_dict
|
| 282 |
+
|
| 283 |
+
def __len__(self) -> int:
|
| 284 |
+
return len(self._rank_dict)
|
| 285 |
+
|
| 286 |
+
def get_rank_dict(self, tokenizer: PreTrainedTokenizer) -> Dict[int, Any]:
|
| 287 |
+
return {tokenizer.convert_tokens_to_ids([token])[0]: value for token, value in self._rank_dict.items()}
|
| 288 |
+
|
| 289 |
+
def to_tokenizer(self, tokenizer: PreTrainedTokenizer) -> PreTrainedTokenizer:
|
| 290 |
+
"""Augment tokenizer vocab with `rank_dict` IN-PLACE.
|
| 291 |
+
"""
|
| 292 |
+
vocabs: List[Text] = self._rank_dict.keys()
|
| 293 |
+
new_vocab = [vocab for vocab in vocabs if vocab not in tokenizer.get_vocab()]
|
| 294 |
+
tokenizer.add_tokens(new_vocab)
|
| 295 |
+
return tokenizer
|
| 296 |
+
def from_tokenizer(cls, tokenizer: PreTrainedTokenizer) -> "SingleLabelRankDict":
|
| 297 |
+
vocab = tokenizer.get_vocab()
|
| 298 |
+
rank_dict = {}
|
| 299 |
+
pattern = re.compile(r" <\|label_level_(\d+)\|>")
|
| 300 |
+
|
| 301 |
+
for token in vocab.keys():
|
| 302 |
+
match = pattern.match(token)
|
| 303 |
+
if match:
|
| 304 |
+
value = int(match.group(1))
|
| 305 |
+
# normalized_value = value / (len(vocab) - 1)
|
| 306 |
+
rank_dict[token] = value
|
| 307 |
+
|
| 308 |
+
# normalize rank_values
|
| 309 |
+
num_levels = max(rank_dict.values()) + 1
|
| 310 |
+
for token in rank_dict.keys():
|
| 311 |
+
rank_dict[token] = 1. / num_levels * (rank_dict[token] + 0.5)
|
| 312 |
+
|
| 313 |
+
return cls(rank_dict=rank_dict)
|
| 314 |
|
| 315 |
|
| 316 |
model = transformers.AutoModelForCausalLM.from_pretrained(
|
|
|
|
| 364 |
hypothesis = "Sam is awake."
|
| 365 |
|
| 366 |
inputs = [
|
| 367 |
+
{
|
| 368 |
+
"role": "user",
|
| 369 |
+
"content": "### Question: Given the premise \"{premise}\", how likely is it that the hypothesis \"{hypothesis}\" is true?\n\n".format(
|
| 370 |
+
premise=premise,
|
| 371 |
+
hypothesis=hypothesis
|
| 372 |
+
)
|
| 373 |
+
},
|
| 374 |
+
{
|
| 375 |
+
"role": "assitant",
|
| 376 |
+
"content": "### Answer:"
|
| 377 |
+
}
|
| 378 |
]
|
| 379 |
|
| 380 |
result = pipe(inputs)
|