kaidol-thinking-experiment / voting /elo_calculator.py
developer-lunark's picture
Upload folder using huggingface_hub
7b7257a verified
"""ELO λ ˆμ΄νŒ… μ‹œμŠ€ν…œ"""
import json
from pathlib import Path
from typing import Dict, List, Tuple, Optional
from threading import Lock
class ELOCalculator:
"""ELO λ ˆμ΄νŒ… 계산"""
def __init__(
self,
k_factor: float = 32,
initial_rating: int = 1500,
ratings_path: str = "elo_ratings.json",
):
self.k_factor = k_factor
self.initial_rating = initial_rating
self.ratings_path = Path(ratings_path)
self.lock = Lock()
self.ratings: Dict[str, float] = {}
self._load_ratings()
def _load_ratings(self):
"""μ €μž₯된 λ ˆμ΄νŒ… λ‘œλ“œ"""
if self.ratings_path.exists():
try:
with open(self.ratings_path, "r", encoding="utf-8") as f:
self.ratings = json.load(f)
except (json.JSONDecodeError, IOError):
self.ratings = {}
def _save_ratings(self):
"""λ ˆμ΄νŒ… μ €μž₯"""
with self.lock:
with open(self.ratings_path, "w", encoding="utf-8") as f:
json.dump(self.ratings, f, ensure_ascii=False, indent=2)
def get_rating(self, model: str) -> float:
"""λͺ¨λΈμ˜ ν˜„μž¬ λ ˆμ΄νŒ…"""
return self.ratings.get(model, self.initial_rating)
def expected_score(self, rating_a: float, rating_b: float) -> float:
"""μ˜ˆμƒ 승λ₯  계산"""
return 1 / (1 + 10 ** ((rating_b - rating_a) / 400))
def update_ratings(
self,
model_a: str,
model_b: str,
result: str, # "a", "b", "tie"
) -> Tuple[float, float]:
"""λ ˆμ΄νŒ… μ—…λ°μ΄νŠΈ"""
rating_a = self.get_rating(model_a)
rating_b = self.get_rating(model_b)
expected_a = self.expected_score(rating_a, rating_b)
expected_b = self.expected_score(rating_b, rating_a)
# μ‹€μ œ 점수
if result == "a":
actual_a, actual_b = 1.0, 0.0
elif result == "b":
actual_a, actual_b = 0.0, 1.0
else: # tie
actual_a, actual_b = 0.5, 0.5
# μƒˆ λ ˆμ΄νŒ… 계산
new_rating_a = rating_a + self.k_factor * (actual_a - expected_a)
new_rating_b = rating_b + self.k_factor * (actual_b - expected_b)
with self.lock:
self.ratings[model_a] = new_rating_a
self.ratings[model_b] = new_rating_b
# μ €μž₯
self._save_ratings()
return new_rating_a, new_rating_b
def get_leaderboard(self) -> List[Tuple[str, float]]:
"""λ ˆμ΄νŒ… 순 λ¦¬λ”λ³΄λ“œ"""
return sorted(
[(model, rating) for model, rating in self.ratings.items()],
key=lambda x: x[1],
reverse=True,
)
def get_leaderboard_with_stats(
self,
vote_stats: Dict[str, Dict],
) -> List[Dict]:
"""톡계 포함 λ¦¬λ”λ³΄λ“œ"""
leaderboard = []
for model, rating in self.get_leaderboard():
stats = vote_stats.get(model, {})
leaderboard.append({
"model": model,
"elo": round(rating),
"wins": stats.get("wins", 0),
"losses": stats.get("losses", 0),
"ties": stats.get("ties", 0),
"total": stats.get("total", 0),
"win_rate": f"{stats.get('win_rate', 0) * 100:.1f}%",
})
return leaderboard
def get_all_ratings(self) -> Dict[str, float]:
"""λͺ¨λ“  λ ˆμ΄νŒ…"""
return self.ratings.copy()
def reset_ratings(self):
"""λ ˆμ΄νŒ… μ΄ˆκΈ°ν™”"""
self.ratings = {}
self._save_ratings()
# 싱글톀 μΈμŠ€ν„΄μŠ€
_elo_calculator: Optional[ELOCalculator] = None
def get_elo_calculator(
k_factor: float = 32,
initial_rating: int = 1500,
ratings_path: str = "elo_ratings.json",
) -> ELOCalculator:
"""ELOCalculator 싱글톀 μΈμŠ€ν„΄μŠ€"""
global _elo_calculator
if _elo_calculator is None:
_elo_calculator = ELOCalculator(
k_factor=k_factor,
initial_rating=initial_rating,
ratings_path=ratings_path,
)
return _elo_calculator