Update README.md

#18
by aynot - opened
Files changed (1) hide show
  1. README.md +6 -7
README.md CHANGED
@@ -149,16 +149,16 @@ from vllm.inputs.data import TokensPrompt
149
  def format_instruction(instruction, query, doc):
150
  text = [
151
  {"role": "system", "content": "Judge whether the Document meets the requirements based on the Query and the Instruct provided. Note that the answer can only be \"yes\" or \"no\"."},
152
- {"role": "user", "content": f"<Instruct>: {instruction}\n\n<Query>: {query}\n\n<Document>: {doc}"}
153
  ]
154
  return text
155
 
156
- def process_inputs(pairs, instruction, max_length, suffix_tokens):
157
  messages = [format_instruction(instruction, query, doc) for query, doc in pairs]
158
  messages = tokenizer.apply_chat_template(
159
- messages, tokenize=True, add_generation_prompt=False, enable_thinking=False
160
  )
161
- messages = [ele[:max_length] + suffix_tokens for ele in messages]
162
  messages = [TokensPrompt(prompt_token_ids=ele) for ele in messages]
163
  return messages
164
 
@@ -187,9 +187,8 @@ tokenizer = AutoTokenizer.from_pretrained('Qwen/Qwen3-Reranker-0.6B')
187
  model = LLM(model='Qwen/Qwen3-Reranker-0.6B', tensor_parallel_size=number_of_gpu, max_model_len=10000, enable_prefix_caching=True, gpu_memory_utilization=0.8)
188
  tokenizer.padding_side = "left"
189
  tokenizer.pad_token = tokenizer.eos_token
190
- suffix = "<|im_end|>\n<|im_start|>assistant\n<think>\n\n</think>\n\n"
191
  max_length=8192
192
- suffix_tokens = tokenizer.encode(suffix, add_special_tokens=False)
193
  true_token = tokenizer("yes", add_special_tokens=False).input_ids[0]
194
  false_token = tokenizer("no", add_special_tokens=False).input_ids[0]
195
  sampling_params = SamplingParams(temperature=0,
@@ -209,7 +208,7 @@ documents = [
209
  ]
210
 
211
  pairs = list(zip(queries, documents))
212
- inputs = process_inputs(pairs, task, max_length-len(suffix_tokens), suffix_tokens)
213
  scores = compute_logits(model, inputs, sampling_params, true_token, false_token)
214
  print('scores', scores)
215
 
 
149
  def format_instruction(instruction, query, doc):
150
  text = [
151
  {"role": "system", "content": "Judge whether the Document meets the requirements based on the Query and the Instruct provided. Note that the answer can only be \"yes\" or \"no\"."},
152
+ {"role": "user", "content": f"<Instruct>: {instruction}\n<Query>: {query}\n<Document>: {doc}"}
153
  ]
154
  return text
155
 
156
+ def process_inputs(pairs, instruction, max_length):
157
  messages = [format_instruction(instruction, query, doc) for query, doc in pairs]
158
  messages = tokenizer.apply_chat_template(
159
+ messages, tokenize=True, add_generation_prompt=True, enable_thinking=False
160
  )
161
+ messages = [ele[:max_length] for ele in messages]
162
  messages = [TokensPrompt(prompt_token_ids=ele) for ele in messages]
163
  return messages
164
 
 
187
  model = LLM(model='Qwen/Qwen3-Reranker-0.6B', tensor_parallel_size=number_of_gpu, max_model_len=10000, enable_prefix_caching=True, gpu_memory_utilization=0.8)
188
  tokenizer.padding_side = "left"
189
  tokenizer.pad_token = tokenizer.eos_token
190
+
191
  max_length=8192
 
192
  true_token = tokenizer("yes", add_special_tokens=False).input_ids[0]
193
  false_token = tokenizer("no", add_special_tokens=False).input_ids[0]
194
  sampling_params = SamplingParams(temperature=0,
 
208
  ]
209
 
210
  pairs = list(zip(queries, documents))
211
+ inputs = process_inputs(pairs, task, max_length)
212
  scores = compute_logits(model, inputs, sampling_params, true_token, false_token)
213
  print('scores', scores)
214