Spaces:

d3LLM
/

dLLM_Leaderboard

Running

App Files Files Community

dLLM_Leaderboard / d3LLM_Code /aup_utils.py

d3LLM-Data-LLaDA

Initial commit

d473371 4 days ago

raw

history blame contribute delete

2.99 kB

	# AUP (Accuracy Under Parallelism) measure for parallel decoders
	# See paper for detailed definition and motivation
	import math

	def weight_function(y: float, y_max: float, alpha: float = 3.0) -> float:
	"""Quality-weighting function W(y) = min(exp(-alpha * (1 - y/y_max)), 1)"""
	return min(math.exp(-alpha * (1 - y / y_max)), 1.0)

	def get_aup(rho: list[float], y: list[float], y_max: float, alpha: float = 3.0, y_min_offset: float = 5.0, is_print: bool = False) -> float:
	"""
	Calculate the Accuracy Under Parallelism (AUP) of parallelism-accuracy pairs.

	Args:
	rho: list of parallelism values (TPF, tokens per forward)
	y: list of accuracy values in [0, 100] (percentage)
	y_max: maximum accuracy across all methods (for normalization)
	alpha: penalty factor for accuracy degradation (default: 3.0)
	y_min_offset: minimum accuracy threshold offset (default: 5.0, i.e., 5%)

	Returns:
	AUP score (scalar value)
	"""
	assert len(rho) == len(y), "rho and y must have the same length"
	assert len(rho) > 0, "rho and y must not be empty"
	assert all(r > 0 for r in rho), "all rho must be positive"

	# Check if y values are in [0, 100] range
	if any(acc < 1.0 for acc in y):
	print("\033[91mWarning: Detected accuracy values < 1.0. Please check if accuracy should be in percentage (0-100) instead of (0-1).\033[0m")

	# Sort by rho
	sorted_pairs = sorted(zip(rho, y), key=lambda x: x[0])
	sorted_rho, sorted_y = zip(*sorted_pairs)
	sorted_rho, sorted_y = list(sorted_rho), list(sorted_y)

	# Filter by y_min threshold (y_1 - y_min_offset)
	y_1 = sorted_y[0]
	assert y_1 - sorted_y[-1] <= y_min_offset, f"Accuracy degradation is too large: minimum accuracy should be at least {y_min_offset:.2f} lower than the maximum accuracy. Max Acc: {y_1}, min Acc: {sorted_y[-1]}"
	y_min = y_1 - y_min_offset
	filtered_pairs = [(r, acc) for r, acc in zip(sorted_rho, sorted_y) if acc >= y_min]
	assert len(filtered_pairs) > 0, f"No valid pairs after filtering with y_min={y_min}"

	filtered_rho, filtered_y = zip(*filtered_pairs)
	filtered_rho, filtered_y = list(filtered_rho), list(filtered_y)

	# Calculate AUP: first term + trapezoidal sum
	aup = filtered_rho[0] * filtered_y[0]
	formula_parts = [f"{filtered_rho[0]:.2f} * {filtered_y[0]:.2f}"]

	for i in range(1, len(filtered_rho)):
	y_i = filtered_y[i]
	y_prev = filtered_y[i-1]
	w_i = weight_function(y_i, y_max, alpha)
	w_prev = weight_function(y_prev, y_max, alpha)
	term = 0.5 * (filtered_rho[i] - filtered_rho[i-1]) * (y_i * w_i + y_prev * w_prev)
	aup += term
	formula_parts.append(f"({filtered_rho[i]:.2f}-{filtered_rho[i-1]:.2f}) * ({y_i:.2f} * {w_i:.4f} + {y_prev:.2f} * {w_prev:.4f})")

	if is_print:
	formula = f" AUP = " + " + ".join(formula_parts) + f" = {aup:.2f}"
	print(formula)

	return aup