Spaces:

behavior-in-the-wild
/

SDR-Arena

Sleeping

App Files Files Community

SDR-Arena / evaluation /weighted_score.py

behavior-in-the-wild

Deploy SDR-Arena leaderboard

f9e2361 verified 2 months ago

raw

history blame contribute delete

2.23 kB

	"""
	Weighted Coverage Score Calculator.

	Computes weighted coverage scores from Likert (0-5) evaluations.

	Formula:
	score = Sum(item_scores) / (5.0 * count_items)
	Result is 0.0 to 1.0 (multiply by 100 for percentage)

	Adapted from calculate_weighted_coverage.py.
	"""

	from __future__ import annotations

	from typing import Any, Dict, List, Optional


	def calculate_score(coverage_data: Optional[Dict[str, Any]]) -> float:
	"""
	Calculate the weighted coverage score from Likert evaluations.

	Args:
	coverage_data: Dict with "evaluations" key containing list of
	evaluation dicts, each with a "score" field (0-5).

	Returns:
	Float between 0.0 and 1.0 representing the weighted coverage score.
	Returns 0.0 if input is empty or has no valid evaluations.
	"""
	if not coverage_data:
	return 0.0

	evaluations = coverage_data.get("evaluations", [])
	if not evaluations:
	return 0.0

	total_score = 0
	count = 0

	for item in evaluations:
	s = item.get("score", 0)
	if isinstance(s, (int, float)):
	total_score += s
	count += 1

	if count == 0:
	return 0.0

	# Max score per item is 5
	max_possible = count * 5.0
	return total_score / max_possible


	def calculate_score_percentage(coverage_data: Optional[Dict[str, Any]]) -> float:
	"""
	Calculate the weighted coverage score as a percentage (0-100).

	Args:
	coverage_data: Dict with "evaluations" key.

	Returns:
	Float between 0.0 and 100.0.
	"""
	return calculate_score(coverage_data) * 100.0


	def aggregate_scores(scores: List[float]) -> float:
	"""
	Compute the average of a list of scores.

	Args:
	scores: List of float scores.

	Returns:
	Average score, or 0.0 if empty.
	"""
	if not scores:
	return 0.0
	return sum(scores) / len(scores)


	def find_coverage_keys(data: List[Dict]) -> List[str]:
	"""Find all keys ending with _coverage_scores in the data."""
	keys = set()
	for item in data:
	for key in item.keys():
	if key.endswith("_coverage_scores"):
	keys.add(key)
	return sorted(list(keys))