| import numpy as np | |
| prompt_template = """Given a query and a document, please give a relevance score of 0~10. | |
| The goal or relevance definition is: {instruction} | |
| Here is the query: | |
| {query} | |
| Here is the document: | |
| {doc} | |
| After thinking, directly choose a relevance score from [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]. | |
| - 0 represents completely not related | |
| - 10 means perfectly related. | |
| Desired output format: | |
| <think>put your thinking here</think><answer>Only allows an integer here</answer> | |
| Your output:""" | |
| def truncate(tokenizer, text, length): | |
| if length == None or text == None: | |
| return text | |
| return tokenizer.convert_tokens_to_string(tokenizer.tokenize(text)[:length]) | |
| def hybrid_scores(results, alpha): | |
| first_stage_scores = [each["first_stage_score"] for each in results] | |
| rank_scores = [each["rank_score"] for each in results] | |
| first_stage_mean, first_stage_std = np.mean(first_stage_scores), np.std(first_stage_scores) | |
| rank_mean, rank_std = np.mean(rank_scores), np.std(rank_scores) | |
| hybrid_results = [] | |
| for result in results: | |
| normalized_first_stage_score = (result["first_stage_score"] - first_stage_mean) / first_stage_std | |
| normalized_rank_score = (result["rank_score"] - rank_mean) / rank_std | |
| hybrid_results.append({ | |
| **result, | |
| "hybrid_score": float(alpha * normalized_first_stage_score + (1-alpha) * normalized_rank_score) | |
| }) | |
| hybrid_results.sort(key=lambda x:x['hybrid_score'], reverse=True) | |
| return hybrid_results | |