| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| import re |
| import threading |
| import requests |
| from huggingface_hub import snapshot_download |
| import os |
| from abc import ABC |
| import numpy as np |
|
|
| from api.settings import LIGHTEN |
| from api.utils.file_utils import get_home_cache_dir |
| from rag.utils import num_tokens_from_string, truncate |
| import json |
|
|
| def sigmoid(x): |
| return 1 / (1 + np.exp(-x)) |
|
|
| class Base(ABC): |
| def __init__(self, key, model_name): |
| pass |
|
|
| def similarity(self, query: str, texts: list): |
| raise NotImplementedError("Please implement encode method!") |
|
|
|
|
| class DefaultRerank(Base): |
| _model = None |
| _model_lock = threading.Lock() |
|
|
| def __init__(self, key, model_name, **kwargs): |
| """ |
| If you have trouble downloading HuggingFace models, -_^ this might help!! |
| |
| For Linux: |
| export HF_ENDPOINT=https://hf-mirror.com |
| |
| For Windows: |
| Good luck |
| ^_- |
| |
| """ |
| if not LIGHTEN and not DefaultRerank._model: |
| import torch |
| from FlagEmbedding import FlagReranker |
| with DefaultRerank._model_lock: |
| if not DefaultRerank._model: |
| try: |
| DefaultRerank._model = FlagReranker(os.path.join(get_home_cache_dir(), re.sub(r"^[a-zA-Z]+/", "", model_name)), use_fp16=torch.cuda.is_available()) |
| except Exception as e: |
| model_dir = snapshot_download(repo_id= model_name, |
| local_dir=os.path.join(get_home_cache_dir(), re.sub(r"^[a-zA-Z]+/", "", model_name)), |
| local_dir_use_symlinks=False) |
| DefaultRerank._model = FlagReranker(model_dir, use_fp16=torch.cuda.is_available()) |
| self._model = DefaultRerank._model |
|
|
| def similarity(self, query: str, texts: list): |
| pairs = [(query,truncate(t, 2048)) for t in texts] |
| token_count = 0 |
| for _, t in pairs: |
| token_count += num_tokens_from_string(t) |
| batch_size = 4096 |
| res = [] |
| for i in range(0, len(pairs), batch_size): |
| scores = self._model.compute_score(pairs[i:i + batch_size], max_length=2048) |
| scores = sigmoid(np.array(scores)).tolist() |
| if isinstance(scores, float): res.append(scores) |
| else: res.extend(scores) |
| return np.array(res), token_count |
|
|
|
|
| class JinaRerank(Base): |
| def __init__(self, key, model_name="jina-reranker-v1-base-en", |
| base_url="https://api.jina.ai/v1/rerank"): |
| self.base_url = "https://api.jina.ai/v1/rerank" |
| self.headers = { |
| "Content-Type": "application/json", |
| "Authorization": f"Bearer {key}" |
| } |
| self.model_name = model_name |
|
|
| def similarity(self, query: str, texts: list): |
| texts = [truncate(t, 8196) for t in texts] |
| data = { |
| "model": self.model_name, |
| "query": query, |
| "documents": texts, |
| "top_n": len(texts) |
| } |
| res = requests.post(self.base_url, headers=self.headers, json=data).json() |
| return np.array([d["relevance_score"] for d in res["results"]]), res["usage"]["total_tokens"] |
|
|
|
|
| class YoudaoRerank(DefaultRerank): |
| _model = None |
| _model_lock = threading.Lock() |
|
|
| def __init__(self, key=None, model_name="maidalun1020/bce-reranker-base_v1", **kwargs): |
| if not LIGHTEN and not YoudaoRerank._model: |
| from BCEmbedding import RerankerModel |
| with YoudaoRerank._model_lock: |
| if not YoudaoRerank._model: |
| try: |
| print("LOADING BCE...") |
| YoudaoRerank._model = RerankerModel(model_name_or_path=os.path.join( |
| get_home_cache_dir(), |
| re.sub(r"^[a-zA-Z]+/", "", model_name))) |
| except Exception as e: |
| YoudaoRerank._model = RerankerModel( |
| model_name_or_path=model_name.replace( |
| "maidalun1020", "InfiniFlow")) |
|
|
| self._model = YoudaoRerank._model |
| |
| def similarity(self, query: str, texts: list): |
| pairs = [(query, truncate(t, self._model.max_length)) for t in texts] |
| token_count = 0 |
| for _, t in pairs: |
| token_count += num_tokens_from_string(t) |
| batch_size = 32 |
| res = [] |
| for i in range(0, len(pairs), batch_size): |
| scores = self._model.compute_score(pairs[i:i + batch_size], max_length=self._model.max_length) |
| scores = sigmoid(np.array(scores)).tolist() |
| if isinstance(scores, float): res.append(scores) |
| else: res.extend(scores) |
| return np.array(res), token_count |
|
|
|
|
| class XInferenceRerank(Base): |
| def __init__(self, key="xxxxxxx", model_name="", base_url=""): |
| if base_url.split("/")[-1] != "v1": |
| base_url = os.path.join(base_url, "v1") |
| self.model_name = model_name |
| self.base_url = base_url |
| self.headers = { |
| "Content-Type": "application/json", |
| "accept": "application/json" |
| } |
|
|
| def similarity(self, query: str, texts: list): |
| if len(texts) == 0: |
| return np.array([]), 0 |
| data = { |
| "model": self.model_name, |
| "query": query, |
| "return_documents": "true", |
| "return_len": "true", |
| "documents": texts |
| } |
| res = requests.post(self.base_url, headers=self.headers, json=data).json() |
| return np.array([d["relevance_score"] for d in res["results"]]), res["meta"]["tokens"]["input_tokens"]+res["meta"]["tokens"]["output_tokens"] |
|
|
|
|
| class LocalAIRerank(Base): |
| def __init__(self, key, model_name, base_url): |
| pass |
|
|
| def similarity(self, query: str, texts: list): |
| raise NotImplementedError("The LocalAIRerank has not been implement") |
|
|
|
|
| class NvidiaRerank(Base): |
| def __init__( |
| self, key, model_name, base_url="https://ai.api.nvidia.com/v1/retrieval/nvidia/" |
| ): |
| if not base_url: |
| base_url = "https://ai.api.nvidia.com/v1/retrieval/nvidia/" |
| self.model_name = model_name |
|
|
| if self.model_name == "nvidia/nv-rerankqa-mistral-4b-v3": |
| self.base_url = os.path.join( |
| base_url, "nv-rerankqa-mistral-4b-v3", "reranking" |
| ) |
|
|
| if self.model_name == "nvidia/rerank-qa-mistral-4b": |
| self.base_url = os.path.join(base_url, "reranking") |
| self.model_name = "nv-rerank-qa-mistral-4b:1" |
|
|
| self.headers = { |
| "accept": "application/json", |
| "Content-Type": "application/json", |
| "Authorization": f"Bearer {key}", |
| } |
|
|
| def similarity(self, query: str, texts: list): |
| token_count = num_tokens_from_string(query) + sum( |
| [num_tokens_from_string(t) for t in texts] |
| ) |
| data = { |
| "model": self.model_name, |
| "query": {"text": query}, |
| "passages": [{"text": text} for text in texts], |
| "truncate": "END", |
| "top_n": len(texts), |
| } |
| res = requests.post(self.base_url, headers=self.headers, json=data).json() |
| rank = np.array([d["logit"] for d in res["rankings"]]) |
| indexs = [d["index"] for d in res["rankings"]] |
| return rank[indexs], token_count |
|
|
|
|
| class LmStudioRerank(Base): |
| def __init__(self, key, model_name, base_url): |
| pass |
|
|
| def similarity(self, query: str, texts: list): |
| raise NotImplementedError("The LmStudioRerank has not been implement") |
|
|
|
|
| class OpenAI_APIRerank(Base): |
| def __init__(self, key, model_name, base_url): |
| pass |
|
|
| def similarity(self, query: str, texts: list): |
| raise NotImplementedError("The api has not been implement") |
|
|
|
|
| class CoHereRerank(Base): |
| def __init__(self, key, model_name, base_url=None): |
| from cohere import Client |
|
|
| self.client = Client(api_key=key) |
| self.model_name = model_name |
|
|
| def similarity(self, query: str, texts: list): |
| token_count = num_tokens_from_string(query) + sum( |
| [num_tokens_from_string(t) for t in texts] |
| ) |
| res = self.client.rerank( |
| model=self.model_name, |
| query=query, |
| documents=texts, |
| top_n=len(texts), |
| return_documents=False, |
| ) |
| rank = np.array([d.relevance_score for d in res.results]) |
| indexs = [d.index for d in res.results] |
| return rank[indexs], token_count |
|
|
|
|
| class TogetherAIRerank(Base): |
| def __init__(self, key, model_name, base_url): |
| pass |
|
|
| def similarity(self, query: str, texts: list): |
| raise NotImplementedError("The api has not been implement") |
|
|
|
|
| class SILICONFLOWRerank(Base): |
| def __init__( |
| self, key, model_name, base_url="https://api.siliconflow.cn/v1/rerank" |
| ): |
| if not base_url: |
| base_url = "https://api.siliconflow.cn/v1/rerank" |
| self.model_name = model_name |
| self.base_url = base_url |
| self.headers = { |
| "accept": "application/json", |
| "content-type": "application/json", |
| "authorization": f"Bearer {key}", |
| } |
|
|
| def similarity(self, query: str, texts: list): |
| payload = { |
| "model": self.model_name, |
| "query": query, |
| "documents": texts, |
| "top_n": len(texts), |
| "return_documents": False, |
| "max_chunks_per_doc": 1024, |
| "overlap_tokens": 80, |
| } |
| response = requests.post( |
| self.base_url, json=payload, headers=self.headers |
| ).json() |
| rank = np.array([d["relevance_score"] for d in response["results"]]) |
| indexs = [d["index"] for d in response["results"]] |
| return ( |
| rank[indexs], |
| response["meta"]["tokens"]["input_tokens"] + response["meta"]["tokens"]["output_tokens"], |
| ) |
|
|
|
|
| class BaiduYiyanRerank(Base): |
| def __init__(self, key, model_name, base_url=None): |
| from qianfan.resources import Reranker |
|
|
| key = json.loads(key) |
| ak = key.get("yiyan_ak", "") |
| sk = key.get("yiyan_sk", "") |
| self.client = Reranker(ak=ak, sk=sk) |
| self.model_name = model_name |
|
|
| def similarity(self, query: str, texts: list): |
| res = self.client.do( |
| model=self.model_name, |
| query=query, |
| documents=texts, |
| top_n=len(texts), |
| ).body |
| rank = np.array([d["relevance_score"] for d in res["results"]]) |
| indexs = [d["index"] for d in res["results"]] |
| return rank[indexs], res["usage"]["total_tokens"] |
|
|
|
|
| class VoyageRerank(Base): |
| def __init__(self, key, model_name, base_url=None): |
| import voyageai |
|
|
| self.client = voyageai.Client(api_key=key) |
| self.model_name = model_name |
|
|
| def similarity(self, query: str, texts: list): |
| res = self.client.rerank( |
| query=query, documents=texts, model=self.model_name, top_k=len(texts) |
| ) |
| rank = np.array([r.relevance_score for r in res.results]) |
| indexs = [r.index for r in res.results] |
| return rank[indexs], res.total_tokens |
|
|