qwenillustrious
/
sentence-transformers
/examples
/cross_encoder
/applications
/cross-encoder_usage.py
| """ | |
| This example computes the score between a query and all possible | |
| sentences in a corpus using a Cross-Encoder for semantic textual similarity (STS). | |
| It output then the most similar sentences for the given query. | |
| """ | |
| import numpy as np | |
| from sentence_transformers.cross_encoder import CrossEncoder | |
| # Pre-trained cross encoder | |
| model = CrossEncoder("cross-encoder/stsb-distilroberta-base") | |
| # We want to compute the similarity between the query sentence | |
| query = "A man is eating pasta." | |
| # With all sentences in the corpus | |
| corpus = [ | |
| "A man is eating food.", | |
| "A man is eating a piece of bread.", | |
| "The girl is carrying a baby.", | |
| "A man is riding a horse.", | |
| "A woman is playing violin.", | |
| "Two men pushed carts through the woods.", | |
| "A man is riding a white horse on an enclosed ground.", | |
| "A monkey is playing drums.", | |
| "A cheetah is running behind its prey.", | |
| ] | |
| # 1. We rank all sentences in the corpus for the query | |
| ranks = model.rank(query, corpus) | |
| # Print the scores | |
| print("Query:", query) | |
| for rank in ranks: | |
| print(f"{rank['score']:.2f}\t{corpus[rank['corpus_id']]}") | |
| # 2. Alternatively, you can also manually compute the score between two sentences | |
| sentence_combinations = [[query, sentence] for sentence in corpus] | |
| scores = model.predict(sentence_combinations) | |
| # Sort the scores in decreasing order to get the corpus indices | |
| ranked_indices = np.argsort(scores)[::-1] | |
| print("scores:", scores) | |
| print("indices:", ranked_indices) | |