Update README.md
#18
by
aynot
- opened
README.md
CHANGED
|
@@ -149,16 +149,16 @@ from vllm.inputs.data import TokensPrompt
|
|
| 149 |
def format_instruction(instruction, query, doc):
|
| 150 |
text = [
|
| 151 |
{"role": "system", "content": "Judge whether the Document meets the requirements based on the Query and the Instruct provided. Note that the answer can only be \"yes\" or \"no\"."},
|
| 152 |
-
{"role": "user", "content": f"<Instruct>: {instruction}\n
|
| 153 |
]
|
| 154 |
return text
|
| 155 |
|
| 156 |
-
def process_inputs(pairs, instruction, max_length
|
| 157 |
messages = [format_instruction(instruction, query, doc) for query, doc in pairs]
|
| 158 |
messages = tokenizer.apply_chat_template(
|
| 159 |
-
messages, tokenize=True, add_generation_prompt=
|
| 160 |
)
|
| 161 |
-
messages = [ele[:max_length]
|
| 162 |
messages = [TokensPrompt(prompt_token_ids=ele) for ele in messages]
|
| 163 |
return messages
|
| 164 |
|
|
@@ -187,9 +187,8 @@ tokenizer = AutoTokenizer.from_pretrained('Qwen/Qwen3-Reranker-0.6B')
|
|
| 187 |
model = LLM(model='Qwen/Qwen3-Reranker-0.6B', tensor_parallel_size=number_of_gpu, max_model_len=10000, enable_prefix_caching=True, gpu_memory_utilization=0.8)
|
| 188 |
tokenizer.padding_side = "left"
|
| 189 |
tokenizer.pad_token = tokenizer.eos_token
|
| 190 |
-
|
| 191 |
max_length=8192
|
| 192 |
-
suffix_tokens = tokenizer.encode(suffix, add_special_tokens=False)
|
| 193 |
true_token = tokenizer("yes", add_special_tokens=False).input_ids[0]
|
| 194 |
false_token = tokenizer("no", add_special_tokens=False).input_ids[0]
|
| 195 |
sampling_params = SamplingParams(temperature=0,
|
|
@@ -209,7 +208,7 @@ documents = [
|
|
| 209 |
]
|
| 210 |
|
| 211 |
pairs = list(zip(queries, documents))
|
| 212 |
-
inputs = process_inputs(pairs, task, max_length
|
| 213 |
scores = compute_logits(model, inputs, sampling_params, true_token, false_token)
|
| 214 |
print('scores', scores)
|
| 215 |
|
|
|
|
| 149 |
def format_instruction(instruction, query, doc):
|
| 150 |
text = [
|
| 151 |
{"role": "system", "content": "Judge whether the Document meets the requirements based on the Query and the Instruct provided. Note that the answer can only be \"yes\" or \"no\"."},
|
| 152 |
+
{"role": "user", "content": f"<Instruct>: {instruction}\n<Query>: {query}\n<Document>: {doc}"}
|
| 153 |
]
|
| 154 |
return text
|
| 155 |
|
| 156 |
+
def process_inputs(pairs, instruction, max_length):
|
| 157 |
messages = [format_instruction(instruction, query, doc) for query, doc in pairs]
|
| 158 |
messages = tokenizer.apply_chat_template(
|
| 159 |
+
messages, tokenize=True, add_generation_prompt=True, enable_thinking=False
|
| 160 |
)
|
| 161 |
+
messages = [ele[:max_length] for ele in messages]
|
| 162 |
messages = [TokensPrompt(prompt_token_ids=ele) for ele in messages]
|
| 163 |
return messages
|
| 164 |
|
|
|
|
| 187 |
model = LLM(model='Qwen/Qwen3-Reranker-0.6B', tensor_parallel_size=number_of_gpu, max_model_len=10000, enable_prefix_caching=True, gpu_memory_utilization=0.8)
|
| 188 |
tokenizer.padding_side = "left"
|
| 189 |
tokenizer.pad_token = tokenizer.eos_token
|
| 190 |
+
|
| 191 |
max_length=8192
|
|
|
|
| 192 |
true_token = tokenizer("yes", add_special_tokens=False).input_ids[0]
|
| 193 |
false_token = tokenizer("no", add_special_tokens=False).input_ids[0]
|
| 194 |
sampling_params = SamplingParams(temperature=0,
|
|
|
|
| 208 |
]
|
| 209 |
|
| 210 |
pairs = list(zip(queries, documents))
|
| 211 |
+
inputs = process_inputs(pairs, task, max_length)
|
| 212 |
scores = compute_logits(model, inputs, sampling_params, true_token, false_token)
|
| 213 |
print('scores', scores)
|
| 214 |
|