Zhengping commited on
Commit
e6945e3
·
verified ·
1 Parent(s): 6b2d1c9

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +222 -4
README.md CHANGED
@@ -50,23 +50,230 @@ This is the model card of a 🤗 transformers model that has been pushed on the
50
  <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
51
 
52
  ```python
 
53
  import transformers
54
  import torch
55
  from transformers.pipelines import PIPELINE_REGISTRY
56
  from transformers import (
57
  pipeline,
 
 
 
58
  AutoModelForCausalLM,
59
  PreTrainedTokenizer
60
  )
 
61
  from typing import (
62
  Dict,
63
  Callable,
64
  Tuple,
65
  List,
66
  )
67
- from src.pipelines.level_to_score_pipeline import LevelToScorePipeline
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  from src.rank_dicts import SingleLabelRankDict
69
- from src.chat_templates import UNLITemplate
70
 
71
 
72
  model = transformers.AutoModelForCausalLM.from_pretrained(
@@ -119,8 +326,19 @@ template = UNLITemplate()
119
  premise = "Sam is sleeping."
120
  hypothesis = "Sam is awake."
121
 
122
- inputs = template.get_prompt_template(premise=premise, hypothesis=hypothesis) +\
123
- template.get_completion_template(is_completion=True)
 
 
 
 
 
 
 
 
 
 
 
124
 
125
  result = pipe(inputs)
126
  print(result)
 
50
  <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
51
 
52
  ```python
53
+ import enum
54
  import transformers
55
  import torch
56
  from transformers.pipelines import PIPELINE_REGISTRY
57
  from transformers import (
58
  pipeline,
59
+ Pipeline,
60
+ TextGenerationPipeline,
61
+ PreTrainedTokenizer,
62
  AutoModelForCausalLM,
63
  PreTrainedTokenizer
64
  )
65
+ from transformers.pipelines.text_generation import Chat, ReturnType
66
  from typing import (
67
  Dict,
68
  Callable,
69
  Tuple,
70
  List,
71
  )
72
+
73
+
74
+ class LevelToScorePipeline(TextGenerationPipeline):
75
+
76
+ def __init__(
77
+ self,
78
+ level_to_score_func: Callable[[Tuple[torch.FloatTensor], PreTrainedTokenizer], Tuple[List[float], List[List[float]]]],
79
+ *args,
80
+ **kwargs
81
+ ):
82
+ super().__init__(*args, **kwargs)
83
+ self._level_to_score_func = level_to_score_func
84
+
85
+ def preprocess(
86
+ self,
87
+ prompt_text,
88
+ prefix="",
89
+ handle_long_generation=None,
90
+ add_special_tokens=None,
91
+ truncation=None,
92
+ padding=None,
93
+ max_length=None,
94
+ continue_final_message=None,
95
+ **generate_kwargs,
96
+ ):
97
+ # Only set non-None tokenizer kwargs, so as to rely on the tokenizer's defaults
98
+ tokenizer_kwargs = {
99
+ "add_special_tokens": add_special_tokens,
100
+ "truncation": truncation,
101
+ "padding": padding,
102
+ "max_length": max_length,
103
+ }
104
+ tokenizer_kwargs = {key: value for key, value in tokenizer_kwargs.items() if value is not None}
105
+
106
+ if isinstance(prompt_text, Chat):
107
+ tokenizer_kwargs.pop("add_special_tokens", None) # ignore add_special_tokens on chats
108
+ # If the user passes a chat that ends in an assistant message, we treat it as a prefill by default
109
+ # because very few models support multiple separate, consecutive assistant messages
110
+ if continue_final_message is None:
111
+ continue_final_message = prompt_text.messages[-1]["role"] == "assistant"
112
+ inputs = self.tokenizer.apply_chat_template(
113
+ prompt_text.messages,
114
+ add_generation_prompt=not continue_final_message,
115
+ continue_final_message=continue_final_message,
116
+ return_dict=True,
117
+ return_tensors=self.framework,
118
+ **tokenizer_kwargs,
119
+ )
120
+ else:
121
+ inputs = self.tokenizer(prefix + prompt_text, return_tensors=self.framework, **tokenizer_kwargs)
122
+
123
+ inputs["prompt_text"] = prompt_text
124
+
125
+ if handle_long_generation == "hole":
126
+ cur_len = inputs["input_ids"].shape[-1]
127
+ if "max_new_tokens" in generate_kwargs:
128
+ new_tokens = generate_kwargs["max_new_tokens"]
129
+ else:
130
+ new_tokens = generate_kwargs.get("max_length", self.generation_config.max_length) - cur_len
131
+ if new_tokens < 0:
132
+ raise ValueError("We cannot infer how many new tokens are expected")
133
+ if cur_len + new_tokens > self.tokenizer.model_max_length:
134
+ keep_length = self.tokenizer.model_max_length - new_tokens
135
+ if keep_length <= 0:
136
+ raise ValueError(
137
+ "We cannot use `hole` to handle this generation the number of desired tokens exceeds the"
138
+ " models max length"
139
+ )
140
+
141
+ inputs["input_ids"] = inputs["input_ids"][:, -keep_length:]
142
+ if "attention_mask" in inputs:
143
+ inputs["attention_mask"] = inputs["attention_mask"][:, -keep_length:]
144
+
145
+ return inputs
146
+
147
+ def _forward(self, model_inputs, **generate_kwargs):
148
+ input_ids = model_inputs["input_ids"]
149
+ attention_mask = model_inputs.get("attention_mask", None)
150
+ # Allow empty prompts
151
+ if input_ids.shape[1] == 0:
152
+ input_ids = None
153
+ attention_mask = None
154
+ in_b = 1
155
+ else:
156
+ in_b = input_ids.shape[0]
157
+ prompt_text = model_inputs.pop("prompt_text")
158
+
159
+ # If there is a prefix, we may need to adjust the generation length. Do so without permanently modifying
160
+ # generate_kwargs, as some of the parameterization may come from the initialization of the pipeline.
161
+ prefix_length = generate_kwargs.pop("prefix_length", 0)
162
+ if prefix_length > 0:
163
+ has_max_new_tokens = "max_new_tokens" in generate_kwargs or (
164
+ "generation_config" in generate_kwargs
165
+ and generate_kwargs["generation_config"].max_new_tokens is not None
166
+ )
167
+ if not has_max_new_tokens:
168
+ generate_kwargs["max_length"] = generate_kwargs.get("max_length") or self.generation_config.max_length
169
+ generate_kwargs["max_length"] += prefix_length
170
+ has_min_new_tokens = "min_new_tokens" in generate_kwargs or (
171
+ "generation_config" in generate_kwargs
172
+ and generate_kwargs["generation_config"].min_new_tokens is not None
173
+ )
174
+ if not has_min_new_tokens and "min_length" in generate_kwargs:
175
+ generate_kwargs["min_length"] += prefix_length
176
+
177
+ # User-defined `generation_config` passed to the pipeline call take precedence
178
+ if "generation_config" not in generate_kwargs:
179
+ generate_kwargs["generation_config"] = self.generation_config
180
+
181
+ generate_kwargs["output_scores"] = not generate_kwargs.get("do_sample", False)
182
+ generate_kwargs["return_dict_in_generate"] = True
183
+
184
+ generated_sequence = self.model.generate(input_ids=input_ids, attention_mask=attention_mask, **generate_kwargs)
185
+
186
+ logits = None
187
+
188
+ # TODO: check good default
189
+ if generate_kwargs.get("return_scores", True):
190
+ assert not generate_kwargs.get("do_sample", False), "return_logits=True is only supported for do_sample=False"
191
+
192
+ # Proceed to process logits and convert to score average.
193
+ # next_token_logits is [batch_size, vocab_size]
194
+ # raw_logits is a tuple of ([next_token_logits, past_key_values])
195
+
196
+ logits = generated_sequence.scores
197
+
198
+ out_b = generated_sequence.sequences.shape[0]
199
+ if self.framework == "pt":
200
+ generated_sequence = generated_sequence.sequences.reshape(in_b, out_b // in_b, *generated_sequence.sequences.shape[1:])
201
+ # elif self.framework == "tf":
202
+ # generated_sequence = tf.reshape(generated_sequence, (in_b, out_b // in_b, *generated_sequence.shape[1:]))
203
+ return {"generated_sequence": generated_sequence, "input_ids": input_ids, "prompt_text": prompt_text, "logits": logits}
204
+
205
+ def postprocess(
206
+ self,
207
+ model_outputs,
208
+ return_type=ReturnType.FULL_TEXT,
209
+ clean_up_tokenization_spaces=True,
210
+ continue_final_message=None,
211
+ ):
212
+ generated_sequence = model_outputs["generated_sequence"][0]
213
+ input_ids = model_outputs["input_ids"]
214
+ prompt_text = model_outputs["prompt_text"]
215
+ logits = model_outputs["logits"]
216
+
217
+ #TODO: This is now making many assumptions about how the logits are ordered,
218
+ # Should think about how to make this explicit
219
+ scores, selective_logits = self._level_to_score_func(logits, self.tokenizer)
220
+
221
+ generated_sequence = generated_sequence.numpy().tolist()
222
+ records = []
223
+ for sequence in generated_sequence:
224
+ if return_type == ReturnType.TENSORS:
225
+ record = {"generated_token_ids": sequence}
226
+ elif return_type in {ReturnType.NEW_TEXT, ReturnType.FULL_TEXT}:
227
+ # Decode text
228
+ text = self.tokenizer.decode(
229
+ sequence,
230
+ skip_special_tokens=True,
231
+ clean_up_tokenization_spaces=clean_up_tokenization_spaces,
232
+ )
233
+
234
+ # Remove PADDING prompt of the sequence if XLNet or Transfo-XL model is used
235
+ if input_ids is None:
236
+ prompt_length = 0
237
+ else:
238
+ prompt_length = len(
239
+ self.tokenizer.decode(
240
+ input_ids[0],
241
+ skip_special_tokens=True,
242
+ clean_up_tokenization_spaces=clean_up_tokenization_spaces,
243
+ )
244
+ )
245
+
246
+ all_text = text[prompt_length:]
247
+ if return_type == ReturnType.FULL_TEXT:
248
+ if isinstance(prompt_text, str):
249
+ all_text = prompt_text + all_text
250
+ elif isinstance(prompt_text, Chat):
251
+ if continue_final_message is None:
252
+ # If the user passes a chat ending in an assistant message, we treat it as a prefill by
253
+ # default because very few models support multiple separate, consecutive assistant messages
254
+ continue_final_message = prompt_text.messages[-1]["role"] == "assistant"
255
+ if continue_final_message:
256
+ # With assistant prefill, concat onto the end of the last message
257
+ all_text = list(prompt_text.messages)[:-1] + [
258
+ {
259
+ "role": prompt_text.messages[-1]["role"],
260
+ "content": prompt_text.messages[-1]["content"] + all_text,
261
+ }
262
+ ]
263
+ else:
264
+ # When we're not starting from a prefill, the output is a new assistant message
265
+ all_text = list(prompt_text.messages) + [{"role": "assistant", "content": all_text}]
266
+ record = {
267
+ "generated_text": all_text,
268
+ "score": scores[0],
269
+ "selective_logits": selective_logits[0]
270
+ }
271
+ records.append(record)
272
+
273
+ return records
274
+
275
+
276
  from src.rank_dicts import SingleLabelRankDict
 
277
 
278
 
279
  model = transformers.AutoModelForCausalLM.from_pretrained(
 
326
  premise = "Sam is sleeping."
327
  hypothesis = "Sam is awake."
328
 
329
+ inputs = [
330
+ {
331
+ "role": "user",
332
+ "content": "### Question: Given the premise \"{premise}\", how likely is it that the hypothesis \"{hypothesis}\" is true?\n\n".format(
333
+ premise=premise,
334
+ hypothesis=hypothesis
335
+ )
336
+ },
337
+ {
338
+ "role": "assitant",
339
+ "content": "### Answer:"
340
+ }
341
+ ]
342
 
343
  result = pipe(inputs)
344
  print(result)