Add pipeline tag

#3
by nielsr HF Staff - opened
Files changed (1) hide show
  1. README.md +19 -6
README.md CHANGED
@@ -1,9 +1,11 @@
1
  ---
2
- license: apache-2.0
3
  base_model:
4
  - Qwen/Qwen3-8B-Base
5
  library_name: transformers
 
 
6
  ---
 
7
  # Qwen3-Reranker-8B
8
 
9
  <p align="center">
@@ -46,7 +48,7 @@ For more details, including benchmark evaluation, hardware requirements, and inf
46
  > **Note**:
47
  > - `MRL Support` indicates whether the embedding model supports custom dimensions for the final embedding.
48
  > - `Instruction Aware` notes whether the embedding or reranking model supports customizing the input instruction according to different tasks.
49
- > - Our evaluation indicates that, for most downstream tasks, using instructions (instruct) typically yields an improvement of 1% to 5% compared to not using them. Therefore, we recommend that developers create tailored instructions specific to their tasks and scenarios. In multilingual contexts, we also advise users to write their instructions in English, as most instructions utilized during the model training process were originally written in English.
50
 
51
 
52
  ## Usage
@@ -66,7 +68,9 @@ from transformers import AutoModel, AutoTokenizer, AutoModelForCausalLM
66
  def format_instruction(instruction, query, doc):
67
  if instruction is None:
68
  instruction = 'Given a web search query, retrieve relevant passages that answer the query'
69
- output = "<Instruct>: {instruction}\n<Query>: {query}\n<Document>: {doc}".format(instruction=instruction,query=query, doc=doc)
 
 
70
  return output
71
 
72
  def process_inputs(pairs):
@@ -101,8 +105,17 @@ token_false_id = tokenizer.convert_tokens_to_ids("no")
101
  token_true_id = tokenizer.convert_tokens_to_ids("yes")
102
  max_length = 8192
103
 
104
- prefix = "<|im_start|>system\nJudge whether the Document meets the requirements based on the Query and the Instruct provided. Note that the answer can only be \"yes\" or \"no\".<|im_end|>\n<|im_start|>user\n"
105
- suffix = "<|im_end|>\n<|im_start|>assistant\n<think>\n\n</think>\n\n"
 
 
 
 
 
 
 
 
 
106
  prefix_tokens = tokenizer.encode(prefix, add_special_tokens=False)
107
  suffix_tokens = tokenizer.encode(suffix, add_special_tokens=False)
108
 
@@ -150,7 +163,7 @@ If you find our work helpful, feel free to give us a cite.
150
  ```
151
  @misc{qwen3-embedding,
152
  title = {Qwen3-Embedding},
153
- url = {https://qwenlm.github.io/blog/qwen3/},
154
  author = {Qwen Team},
155
  month = {May},
156
  year = {2025}
 
1
  ---
 
2
  base_model:
3
  - Qwen/Qwen3-8B-Base
4
  library_name: transformers
5
+ license: apache-2.0
6
+ pipeline_tag: text-ranking
7
  ---
8
+
9
  # Qwen3-Reranker-8B
10
 
11
  <p align="center">
 
48
  > **Note**:
49
  > - `MRL Support` indicates whether the embedding model supports custom dimensions for the final embedding.
50
  > - `Instruction Aware` notes whether the embedding or reranking model supports customizing the input instruction according to different tasks.
51
+ > - Our evaluation indicates that, for most downstream tasks, using instructions (instruct) typically yields an improvement of 1% to 5% compared to not using them. Therefore, we recommend that developers customize the `instruct` according to their specific scenarios, tasks, and languages. In multilingual contexts, we also advise users to write their instructions in English, as most instructions utilized during the model training process were originally written in English.
52
 
53
 
54
  ## Usage
 
68
  def format_instruction(instruction, query, doc):
69
  if instruction is None:
70
  instruction = 'Given a web search query, retrieve relevant passages that answer the query'
71
+ output = "<Instruct>: {instruction}
72
+ <Query>: {query}
73
+ <Document>: {doc}".format(instruction=instruction,query=query, doc=doc)
74
  return output
75
 
76
  def process_inputs(pairs):
 
105
  token_true_id = tokenizer.convert_tokens_to_ids("yes")
106
  max_length = 8192
107
 
108
+ prefix = "<|im_start|>system
109
+ Judge whether the Document meets the requirements based on the Query and the Instruct provided. Note that the answer can only be \"yes\" or \"no\".<|im_end|>
110
+ <|im_start|>user
111
+ "
112
+ suffix = "<|im_end|>
113
+ <|im_start|>assistant
114
+ <think>
115
+
116
+ </think>
117
+
118
+ "
119
  prefix_tokens = tokenizer.encode(prefix, add_special_tokens=False)
120
  suffix_tokens = tokenizer.encode(suffix, add_special_tokens=False)
121
 
 
163
  ```
164
  @misc{qwen3-embedding,
165
  title = {Qwen3-Embedding},
166
+ url = {https://qwenlm.github.io/blog/qwen3-embedding/},
167
  author = {Qwen Team},
168
  month = {May},
169
  year = {2025}