Spaces:

KurtDu
/

S2S-Bench

Running

App Files Files Community

S2S-Bench / elo_rank.py

KurtDu

Upload elo_rank.py

8efa040 verified over 1 year ago

raw

history blame contribute delete

4.87 kB

	import random
	import json

	class EloRank:
	def __init__(self, initial_rating=1000, k_factor=32):
	"""
	Initialize the EloRank class.
	:param initial_rating: Initial ELO rating for each model.
	:param k_factor: The K-factor that determines the sensitivity of rating changes.
	"""
	self.ratings = {}
	self.initial_rating = initial_rating
	self.k_factor = k_factor
	self.wins = {}

	def add_model(self, model_id):
	"""
	Add a new model with the initial rating.
	:param model_id: Unique identifier for the model.
	"""
	self.ratings[model_id] = self.initial_rating
	self.wins[model_id] = 0

	def record_match(self, winner, loser):
	"""
	Update the ratings based on a match result.
	:param winner: Model ID of the winner.
	:param loser: Model ID of the loser.
	"""
	rating_winner = self.ratings[winner]
	rating_loser = self.ratings[loser]

	expected_winner = self.expected_score(rating_winner, rating_loser)
	expected_loser = self.expected_score(rating_loser, rating_winner)

	self.ratings[winner] += self.k_factor * (1 - expected_winner)
	self.ratings[loser] += self.k_factor * (0 - expected_loser)

	# Update win count
	self.wins[winner] += 1

	def expected_score(self, rating_a, rating_b):
	"""
	Calculate the expected score for a model.
	:param rating_a: Rating of model A.
	:param rating_b: Rating of model B.
	:return: Expected score.
	"""
	return 1 / (1 + 10 ** ((rating_b - rating_a) / 400))

	def get_rating(self, model_id):
	"""
	Get the current rating of a model.
	:param model_id: Unique identifier for the model.
	:return: Current rating of the model.
	"""
	return self.ratings.get(model_id, None)

	def get_wins(self, model_id):
	"""
	Get the number of wins of a model.
	:param model_id: Unique identifier for the model.
	:return: Number of wins of the model.
	"""
	return self.wins.get(model_id, 0)

	def get_top_models(self, n=2):
	"""
	Get the top N models by rating.
	:param n: Number of top models to retrieve.
	:return: List of model IDs of the top models.
	"""
	return sorted(self.ratings, key=self.ratings.get, reverse=True)[:n]

	def sample_next_match(self):
	"""
	Sample the next match based on the probability proportional to the current rating.
	This approach helps accelerate the convergence of ranking.
	:return: Tuple of two model IDs for the next match.
	"""
	model_ids = list(self.ratings.keys())
	probabilities = [self.ratings[model_id] for model_id in model_ids]
	total_rating = sum(probabilities)
	probabilities = [rating / total_rating for rating in probabilities]

	# Sample two different models for the next match
	next_match = random.choices(model_ids, probabilities, k=2)
	while next_match[0] == next_match[1]:
	next_match = random.choices(model_ids, probabilities, k=2)

	return tuple(next_match)

	def process_match_records(self, file_path):
	"""
	Process match records from a JSON file and update ratings and win counts accordingly.
	:param file_path: Path to the JSON file containing match records.
	"""
	with open(file_path, 'r') as file:
	match_records = json.load(file)

	for record in match_records:
	winner = record['winner']
	model_1 = record['model_1']
	model_2 = record['model_2']

	# Add models if they are not already added
	if model_1 not in self.ratings:
	self.add_model(model_1)
	if model_2 not in self.ratings:
	self.add_model(model_2)

	# Record the match result
	if winner == model_1:
	self.record_match(model_1, model_2)
	elif winner == model_2:
	self.record_match(model_2, model_1)

	# # Example Usage
	# e = EloRank()
	# e.add_model('model_A')
	# e.add_model('model_B')
	# e.add_model('model_C')

	# e.record_match('model_A', 'model_B')
	# print(e.get_rating('model_A')) # Should be greater than the initial rating
	# print(e.get_rating('model_B')) # Should be less than the initial rating

	# print(e.get_top_models(2)) # Get the top 2 models
	# print(e.sample_next_match()) # Sample the next match based on ratings

	# # Process match records from a JSON file
	# e.process_match_records('match_records.json')
	# print(e.get_wins('model_A')) # Get the number of wins for model_A