| from sentence_transformers import CrossEncoder | |
| from transformers import LlamaTokenizer | |
| import torch | |
| model_name = "OpenBMB/MiniCPM-Reranker-Light" | |
| model = CrossEncoder(model_name,max_length=1024,trust_remote_code=True, automodel_args={"torch_dtype": torch.float16}) | |
| # You can also use the following code to use flash_attention_2 | |
| #model = CrossEncoder(model_name,max_length=1024,trust_remote_code=True, automodel_args={"attn_implementation":"flash_attention_2","torch_dtype": torch.float16}) | |
| model.tokenizer.padding_side = "right" | |
| query = "中国的首都是哪里?" # "Where is the capital of China?" | |
| passages = ["beijing", "shanghai"] # 北京,上海 | |
| INSTRUCTION = "Query: " | |
| query = INSTRUCTION + query | |
| sentence_pairs = [[query, doc] for doc in passages] | |
| scores = model.predict(sentence_pairs, convert_to_tensor=True).tolist() | |
| rankings = model.rank(query, passages, return_documents=True, convert_to_tensor=True) | |
| print(scores) # [0.017913818359375, 0.0002453327178955078] | |
| for ranking in rankings: | |
| print(f"Score: {ranking['score']:.4f}, Corpus: {ranking['text']}") | |
| # Score: 0.0179, Corpus: beijing | |
| # Score: 0.0002, Corpus: shanghai |