Spaces:

Ksgk-fy
/

anno

No application file

App Files Files Community

anno / simSearch /src /sort.py

Ksgk-fy

Upload 67 files

ee657a1 verified about 2 years ago

raw

history blame contribute delete

3.16 kB

	from .pairmatch import pairmatch_baseline
	from typing import List, Tuple, Dict, Any, Optional
	from tqdm import tqdm
	import numpy as np
	# Reference:
	# Judge result -- {0: 'Tie', 1: 'A is better than B', 2: 'A is no better than B'}
	# Insight:
	# Deterministic Sorting fails due to sthochasticity of LLM & inconsistency of evaluation results

	# Naive Sort O(N^2): Obtains the top ranked conversation index
	# Note that our pairwise comparison is permuation invariant, so the naive sort essentially ensemble stochasitcity of LLMs
	def naive_sort_sthocastic(conversations: List[str],
	sub_objectives: List[str]) -> List[float]:
	"""
	O(N^2) comparison with equal reward of dual success
	Pairmatch_permuted_backward_eval is permutation invariant
	-- pairmatch(i,j) & pairmatch(j, i) differs only in stochasticity of LLM
	-- fixing seed and result should be the same
	"""
	scores = [0] * len(conversations)
	for i in range(len(conversations)):
	for j in range(len(conversations)):
	conversation_A = conversations[i]
	conversation_B = conversations[j]
	conversation_history_pair = (conversation_A, conversation_B)
	judge, info = pairmatch_baseline(conversation_history_pair, sub_objectives)
	if judge == 1:
	scores[i] += 1
	elif judge == 2:
	scores[j] += 1

	return scores / sum(scores) # scores always sum to 1

	# Stochastic Bubble Sort O(N) -- no permuted comparison
	def stochastic_bubble_sort(conversations: List[str],
	sub_objectives: List[str],
	store_path: Optional[str] = None,
	name: str = 'scores') -> List[float]:
	"""
	O(N) comparison
	Equivalent permuted comparison is discarded
	"""
	scores = {sub_objective : [0] * len(conversations) for sub_objective in sub_objectives}
	for i in tqdm(range(len(conversations)), desc='Stochastic Bubble Sort w. POE'):
	for j in range(len(conversations)-i-1):
	print('Begin Pairmatch for ', j,'&',j+1, '...')
	conversation_A = conversations[j]
	conversation_B = conversations[j+1]
	conversation_history_pair = (conversation_A, conversation_B)
	info = pairmatch_baseline(conversation_history_pair, sub_objectives)
	# print('Pairmatch completed for ', j,'&',j+1)
	for sub_objective in sub_objectives:
	judge = info[sub_objective]['relative_score']
	scores[sub_objective][j] += judge[0]
	scores[sub_objective][j+1] += judge[1]
	# store judgement dict into json file
	# write new line if file exists
	if store_path is not None:
	import json
	with open(f'{store_path}/{name}.json', 'w') as f:
	json.dump(info, f)
	print('Pairmatch completed for ', j,'&',j+1)
	for sub_objective in sub_objectives:
	scores[sub_objective] = np.array(scores[sub_objective]) / sum(scores[sub_objective]) # scores always sum to 1
	return scores