Add library_name to metadata
#1
by nielsr HF Staff - opened
README.md
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
---
|
| 2 |
-
|
|
|
|
| 3 |
datasets:
|
| 4 |
- hotpotqa/hotpot_qa
|
| 5 |
- dgslibisey/MuSiQue
|
|
@@ -8,18 +9,19 @@ datasets:
|
|
| 8 |
language:
|
| 9 |
- en
|
| 10 |
- zh
|
|
|
|
| 11 |
metrics:
|
| 12 |
- accuracy
|
| 13 |
- exact_match
|
| 14 |
- f1
|
| 15 |
- recall
|
| 16 |
-
base_model:
|
| 17 |
-
- Qwen/Qwen3-4B-Instruct-2507
|
| 18 |
pipeline_tag: text-ranking
|
|
|
|
| 19 |
tags:
|
| 20 |
- Rerank
|
| 21 |
- Memory
|
| 22 |
---
|
|
|
|
| 23 |
# QRRanker: Query-focused and Memory-aware Reranker for Long Context Processing
|
| 24 |
|
| 25 |
<p align="center">
|
|
@@ -28,8 +30,7 @@ tags:
|
|
| 28 |
<a href="https://huggingface.co/MindscapeRAG/QRRanker"><b>🤗 Models</b></a>
|
| 29 |
</p>
|
| 30 |
|
| 31 |
-
QRRanker is a lightweight reranking framework that leverages **Query-focused Retrieval (QR) heads** to produce continuous relevance scores, enabling effective listwise reranking with small-scale models.
|
| 32 |
-
|
| 33 |
|
| 34 |
## Model Description
|
| 35 |
|
|
@@ -290,7 +291,7 @@ def compute_qr_scores(
|
|
| 290 |
|
| 291 |
# Select specific QR heads
|
| 292 |
if qr_head_list is not None:
|
| 293 |
-
head_set = [tuple(map(int, h.split('-'))) for h in qr_head_list.split(',')]
|
| 294 |
indices = torch.tensor(head_set).to(all_head_scores.device)
|
| 295 |
layers, heads = indices[:, 0], indices[:, 1]
|
| 296 |
all_head_scores = all_head_scores[:, layers, heads, :]
|
|
@@ -323,8 +324,11 @@ def rerank_documents(model, tokenizer, question, paragraphs, qr_head_list, devic
|
|
| 323 |
scores: Corresponding relevance scores
|
| 324 |
"""
|
| 325 |
# Build input sequence
|
| 326 |
-
prompt_prefix = '<|im_start|>user
|
| 327 |
-
|
|
|
|
|
|
|
|
|
|
| 328 |
|
| 329 |
chunk_part = prompt_prefix + retrieval_instruction
|
| 330 |
chunk_ranges = []
|
|
@@ -336,9 +340,13 @@ def rerank_documents(model, tokenizer, question, paragraphs, qr_head_list, devic
|
|
| 336 |
chunk_part += ' ' + text.strip()
|
| 337 |
end = len(chunk_part)
|
| 338 |
chunk_ranges.append([start, end])
|
| 339 |
-
chunk_part += '
|
|
|
|
|
|
|
| 340 |
|
| 341 |
-
query_part = f"Use the retrieved chunks to answer the user's query.
|
|
|
|
|
|
|
| 342 |
full_seq = chunk_part + query_part
|
| 343 |
|
| 344 |
# Tokenize with offset mapping
|
|
@@ -449,6 +457,7 @@ python qr_ranker_inference.py \
|
|
| 449 |
| `--use_summary` | flag | False | Use summary field in data |
|
| 450 |
|
| 451 |
|
|
|
|
| 452 |
|
| 453 |
If you use our QRRanker, please kindly cite:
|
| 454 |
|
|
|
|
| 1 |
---
|
| 2 |
+
base_model:
|
| 3 |
+
- Qwen/Qwen3-4B-Instruct-2507
|
| 4 |
datasets:
|
| 5 |
- hotpotqa/hotpot_qa
|
| 6 |
- dgslibisey/MuSiQue
|
|
|
|
| 9 |
language:
|
| 10 |
- en
|
| 11 |
- zh
|
| 12 |
+
license: apache-2.0
|
| 13 |
metrics:
|
| 14 |
- accuracy
|
| 15 |
- exact_match
|
| 16 |
- f1
|
| 17 |
- recall
|
|
|
|
|
|
|
| 18 |
pipeline_tag: text-ranking
|
| 19 |
+
library_name: transformers
|
| 20 |
tags:
|
| 21 |
- Rerank
|
| 22 |
- Memory
|
| 23 |
---
|
| 24 |
+
|
| 25 |
# QRRanker: Query-focused and Memory-aware Reranker for Long Context Processing
|
| 26 |
|
| 27 |
<p align="center">
|
|
|
|
| 30 |
<a href="https://huggingface.co/MindscapeRAG/QRRanker"><b>🤗 Models</b></a>
|
| 31 |
</p>
|
| 32 |
|
| 33 |
+
QRRanker is a lightweight reranking framework that leverages **Query-focused Retrieval (QR) heads** to produce continuous relevance scores, enabling effective listwise reranking with small-scale models. It was introduced in the paper [Query-focused and Memory-aware Reranker for Long Context Processing](https://huggingface.co/papers/2602.12192).
|
|
|
|
| 34 |
|
| 35 |
## Model Description
|
| 36 |
|
|
|
|
| 291 |
|
| 292 |
# Select specific QR heads
|
| 293 |
if qr_head_list is not None:
|
| 294 |
+
head_set = [tuple(map(int, h.split('-'))) for h in qr_head_list.split(',') ]
|
| 295 |
indices = torch.tensor(head_set).to(all_head_scores.device)
|
| 296 |
layers, heads = indices[:, 0], indices[:, 1]
|
| 297 |
all_head_scores = all_head_scores[:, layers, heads, :]
|
|
|
|
| 324 |
scores: Corresponding relevance scores
|
| 325 |
"""
|
| 326 |
# Build input sequence
|
| 327 |
+
prompt_prefix = '<|im_start|>user
|
| 328 |
+
'
|
| 329 |
+
retrieval_instruction = "Here are some retrieved chunks:
|
| 330 |
+
|
| 331 |
+
"
|
| 332 |
|
| 333 |
chunk_part = prompt_prefix + retrieval_instruction
|
| 334 |
chunk_ranges = []
|
|
|
|
| 340 |
chunk_part += ' ' + text.strip()
|
| 341 |
end = len(chunk_part)
|
| 342 |
chunk_ranges.append([start, end])
|
| 343 |
+
chunk_part += '
|
| 344 |
+
|
| 345 |
+
'
|
| 346 |
|
| 347 |
+
query_part = f"Use the retrieved chunks to answer the user's query.
|
| 348 |
+
|
| 349 |
+
Query: {question}"
|
| 350 |
full_seq = chunk_part + query_part
|
| 351 |
|
| 352 |
# Tokenize with offset mapping
|
|
|
|
| 457 |
| `--use_summary` | flag | False | Use summary field in data |
|
| 458 |
|
| 459 |
|
| 460 |
+
## Citation
|
| 461 |
|
| 462 |
If you use our QRRanker, please kindly cite:
|
| 463 |
|