Spaces:
Sleeping
Sleeping
adds reranker
Browse files- doc_searcher.py +11 -14
- reranker.py +49 -0
doc_searcher.py
CHANGED
|
@@ -2,6 +2,7 @@ from qdrant_client import QdrantClient
|
|
| 2 |
from qdrant_client.models import Filter, FieldCondition, MatchValue, MatchText
|
| 3 |
from fastembed import SparseTextEmbedding, LateInteractionTextEmbedding
|
| 4 |
from qdrant_client import QdrantClient, models
|
|
|
|
| 5 |
from sentence_transformers import SentenceTransformer
|
| 6 |
from config import DENSE_MODEL, SPARSE_MODEL, LATE_INTERACTION_MODEL, QDRANT_URL, QDRANT_API_KEY,HUGGING_FACE_API_KEY
|
| 7 |
|
|
@@ -9,8 +10,9 @@ class DocSearcher:
|
|
| 9 |
|
| 10 |
def __init__(self, collection_name):
|
| 11 |
self.collection_name = collection_name
|
|
|
|
| 12 |
self.dense_model = SentenceTransformer(DENSE_MODEL,device="cpu",token=HUGGING_FACE_API_KEY)
|
| 13 |
-
self.model = SentenceTransformer("Qwen/Qwen3-Embedding-
|
| 14 |
self.sparse_model = SparseTextEmbedding(SPARSE_MODEL)
|
| 15 |
self.late_interaction_model = LateInteractionTextEmbedding(LATE_INTERACTION_MODEL)
|
| 16 |
self.qdrant_client = QdrantClient(QDRANT_URL,api_key=QDRANT_API_KEY,timeout=30)
|
|
@@ -101,35 +103,30 @@ class DocSearcher:
|
|
| 101 |
|
| 102 |
async def search_temp(self, text: str):
|
| 103 |
|
|
|
|
| 104 |
dense_query = self.model.encode(text).tolist()
|
| 105 |
# sparse_query = next(self.sparse_model.query_embed(text))
|
| 106 |
|
| 107 |
prefetch = [
|
| 108 |
models.Prefetch(
|
| 109 |
query=dense_query,
|
| 110 |
-
using="Qwen/Qwen3-Embedding-
|
| 111 |
-
limit=
|
| 112 |
),
|
| 113 |
-
# models.Prefetch(
|
| 114 |
-
# query=models.SparseVector(**sparse_query.as_object()),
|
| 115 |
-
# using=SPARSE_MODEL,
|
| 116 |
-
# limit=100
|
| 117 |
-
# )
|
| 118 |
]
|
| 119 |
|
| 120 |
search_result = self.qdrant_client.query_points(
|
| 121 |
collection_name= "sl-list",
|
| 122 |
prefetch=prefetch,
|
| 123 |
-
query=models.FusionQuery(
|
| 124 |
-
fusion=models.Fusion.RRF,
|
| 125 |
-
),
|
| 126 |
with_payload=True,
|
| 127 |
-
limit =
|
| 128 |
).points
|
| 129 |
|
| 130 |
data = []
|
| 131 |
|
| 132 |
for hit in search_result:
|
| 133 |
-
data.append(hit.payload)
|
|
|
|
|
|
|
| 134 |
|
| 135 |
-
return
|
|
|
|
| 2 |
from qdrant_client.models import Filter, FieldCondition, MatchValue, MatchText
|
| 3 |
from fastembed import SparseTextEmbedding, LateInteractionTextEmbedding
|
| 4 |
from qdrant_client import QdrantClient, models
|
| 5 |
+
from reranker import Reranker
|
| 6 |
from sentence_transformers import SentenceTransformer
|
| 7 |
from config import DENSE_MODEL, SPARSE_MODEL, LATE_INTERACTION_MODEL, QDRANT_URL, QDRANT_API_KEY,HUGGING_FACE_API_KEY
|
| 8 |
|
|
|
|
| 10 |
|
| 11 |
def __init__(self, collection_name):
|
| 12 |
self.collection_name = collection_name
|
| 13 |
+
self.reranker = Reranker()
|
| 14 |
self.dense_model = SentenceTransformer(DENSE_MODEL,device="cpu",token=HUGGING_FACE_API_KEY)
|
| 15 |
+
self.model = SentenceTransformer("Qwen/Qwen3-Embedding-0.6B",device="cpu")
|
| 16 |
self.sparse_model = SparseTextEmbedding(SPARSE_MODEL)
|
| 17 |
self.late_interaction_model = LateInteractionTextEmbedding(LATE_INTERACTION_MODEL)
|
| 18 |
self.qdrant_client = QdrantClient(QDRANT_URL,api_key=QDRANT_API_KEY,timeout=30)
|
|
|
|
| 103 |
|
| 104 |
async def search_temp(self, text: str):
|
| 105 |
|
| 106 |
+
queries = [text]
|
| 107 |
dense_query = self.model.encode(text).tolist()
|
| 108 |
# sparse_query = next(self.sparse_model.query_embed(text))
|
| 109 |
|
| 110 |
prefetch = [
|
| 111 |
models.Prefetch(
|
| 112 |
query=dense_query,
|
| 113 |
+
using="Qwen/Qwen3-Embedding-0.6B",
|
| 114 |
+
limit=100
|
| 115 |
),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
]
|
| 117 |
|
| 118 |
search_result = self.qdrant_client.query_points(
|
| 119 |
collection_name= "sl-list",
|
| 120 |
prefetch=prefetch,
|
|
|
|
|
|
|
|
|
|
| 121 |
with_payload=True,
|
| 122 |
+
limit = 100,
|
| 123 |
).points
|
| 124 |
|
| 125 |
data = []
|
| 126 |
|
| 127 |
for hit in search_result:
|
| 128 |
+
data.append(hit.payload["tekst"])
|
| 129 |
+
|
| 130 |
+
scores = self.reranker.compute_logits(queries,data)
|
| 131 |
|
| 132 |
+
return scores
|
reranker.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from transformers import AutoModel, AutoTokenizer, AutoModelForCausalLM
|
| 3 |
+
|
| 4 |
+
class Reranker:
|
| 5 |
+
|
| 6 |
+
def __init__(self):
|
| 7 |
+
self.tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-Reranker-4B", padding_side='left')
|
| 8 |
+
self.model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen3-Reranker-4B").eval()
|
| 9 |
+
|
| 10 |
+
def format_instruction(instruction, query, doc):
|
| 11 |
+
if instruction is None:
|
| 12 |
+
instruction = 'Given a web search query, retrieve relevant passages that answer the query'
|
| 13 |
+
output = "<Instruct>: {instruction}\n<Query>: {query}\n<Document>: {doc}".format(instruction=instruction,query=query, doc=doc)
|
| 14 |
+
return output
|
| 15 |
+
|
| 16 |
+
def process_inputs(self,pairs):
|
| 17 |
+
prefix = "<|im_start|>system\nJudge whether the Document meets the requirements based on the Query and the Instruct provided. Note that the answer can only be \"yes\" or \"no\".<|im_end|>\n<|im_start|>user\n"
|
| 18 |
+
suffix = "<|im_end|>\n<|im_start|>assistant\n<think>\n\n</think>\n\n"
|
| 19 |
+
prefix_tokens = self.tokenizer.encode(prefix, add_special_tokens=False)
|
| 20 |
+
suffix_tokens = self.tokenizer.encode(suffix, add_special_tokens=False)
|
| 21 |
+
max_length = 2048
|
| 22 |
+
inputs = self.tokenizer(
|
| 23 |
+
pairs, padding=False, truncation='longest_first',
|
| 24 |
+
return_attention_mask=False, max_length=max_length - len(prefix_tokens) - len(suffix_tokens)
|
| 25 |
+
)
|
| 26 |
+
for i, ele in enumerate(inputs['input_ids']):
|
| 27 |
+
inputs['input_ids'][i] = prefix_tokens + ele + suffix_tokens
|
| 28 |
+
inputs = self.tokenizer.pad(inputs, padding=True, return_tensors="pt", max_length=max_length)
|
| 29 |
+
for key in inputs:
|
| 30 |
+
inputs[key] = inputs[key].to(self.model.device)
|
| 31 |
+
return inputs
|
| 32 |
+
|
| 33 |
+
@torch.no_grad
|
| 34 |
+
def compute_logits(self,queries,documents):
|
| 35 |
+
token_false_id = self.tokenizer.convert_tokens_to_ids("no")
|
| 36 |
+
token_true_id = self.tokenizer.convert_tokens_to_ids("yes")
|
| 37 |
+
|
| 38 |
+
task = 'Given a web search query, retrieve relevant passages that answer the query'
|
| 39 |
+
pairs = [self.format_instruction(task, query, doc) for query, doc in zip(queries, documents)]
|
| 40 |
+
inputs = self.process_inputs(pairs)
|
| 41 |
+
|
| 42 |
+
batch_scores = self.model(**inputs).logits[:, -1, :]
|
| 43 |
+
true_vector = batch_scores[:, token_true_id]
|
| 44 |
+
false_vector = batch_scores[:, token_false_id]
|
| 45 |
+
batch_scores = torch.stack([false_vector, true_vector], dim=1)
|
| 46 |
+
batch_scores = torch.nn.functional.log_softmax(batch_scores, dim=1)
|
| 47 |
+
scores = batch_scores[:, 1].exp().tolist()
|
| 48 |
+
|
| 49 |
+
return scores
|