Alibaba-NLP
/

ERank-4B

Model card Files Files and versions

ERank-4B / examples /utils.py

Ucreate's picture

Upload examples

4e55980 verified 4 months ago

history blame contribute delete

1.53 kB

	import numpy as np

	prompt_template = """Given a query and a document, please give a relevance score of 0~10.
	The goal or relevance definition is: {instruction}

	Here is the query:
	{query}

	Here is the document:
	{doc}

	After thinking, directly choose a relevance score from [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10].
	- 0 represents completely not related
	- 10 means perfectly related.

	Desired output format:
	<think>put your thinking here</think><answer>Only allows an integer here</answer>

	Your output:"""


	def truncate(tokenizer, text, length):
	if length == None or text == None:
	return text
	return tokenizer.convert_tokens_to_string(tokenizer.tokenize(text)[:length])


	def hybrid_scores(results, alpha):
	first_stage_scores = [each["first_stage_score"] for each in results]
	rank_scores = [each["rank_score"] for each in results]
	first_stage_mean, first_stage_std = np.mean(first_stage_scores), np.std(first_stage_scores)
	rank_mean, rank_std = np.mean(rank_scores), np.std(rank_scores)

	hybrid_results = []
	for result in results:
	normalized_first_stage_score = (result["first_stage_score"] - first_stage_mean) / first_stage_std
	normalized_rank_score = (result["rank_score"] - rank_mean) / rank_std
	hybrid_results.append({
	**result,
	"hybrid_score": float(alpha * normalized_first_stage_score + (1-alpha) * normalized_rank_score)
	})
	hybrid_results.sort(key=lambda x:x['hybrid_score'], reverse=True)

	return hybrid_results