Spaces:
Running
Running
| # AUP (Accuracy Under Parallelism) measure for parallel decoders | |
| # See paper for detailed definition and motivation | |
| import math | |
| def weight_function(y: float, y_max: float, alpha: float = 3.0) -> float: | |
| """Quality-weighting function W(y) = min(exp(-alpha * (1 - y/y_max)), 1)""" | |
| return min(math.exp(-alpha * (1 - y / y_max)), 1.0) | |
| def get_aup(rho: list[float], y: list[float], y_max: float, alpha: float = 3.0, y_min_offset: float = 5.0, is_print: bool = False) -> float: | |
| """ | |
| Calculate the Accuracy Under Parallelism (AUP) of parallelism-accuracy pairs. | |
| Args: | |
| rho: list of parallelism values (TPF, tokens per forward) | |
| y: list of accuracy values in [0, 100] (percentage) | |
| y_max: maximum accuracy across all methods (for normalization) | |
| alpha: penalty factor for accuracy degradation (default: 3.0) | |
| y_min_offset: minimum accuracy threshold offset (default: 5.0, i.e., 5%) | |
| Returns: | |
| AUP score (scalar value) | |
| """ | |
| assert len(rho) == len(y), "rho and y must have the same length" | |
| assert len(rho) > 0, "rho and y must not be empty" | |
| assert all(r > 0 for r in rho), "all rho must be positive" | |
| # Check if y values are in [0, 100] range | |
| if any(acc < 1.0 for acc in y): | |
| print("\033[91mWarning: Detected accuracy values < 1.0. Please check if accuracy should be in percentage (0-100) instead of (0-1).\033[0m") | |
| # Sort by rho | |
| sorted_pairs = sorted(zip(rho, y), key=lambda x: x[0]) | |
| sorted_rho, sorted_y = zip(*sorted_pairs) | |
| sorted_rho, sorted_y = list(sorted_rho), list(sorted_y) | |
| # Filter by y_min threshold (y_1 - y_min_offset) | |
| y_1 = sorted_y[0] | |
| assert y_1 - sorted_y[-1] <= y_min_offset, f"Accuracy degradation is too large: minimum accuracy should be at least {y_min_offset:.2f} lower than the maximum accuracy. Max Acc: {y_1}, min Acc: {sorted_y[-1]}" | |
| y_min = y_1 - y_min_offset | |
| filtered_pairs = [(r, acc) for r, acc in zip(sorted_rho, sorted_y) if acc >= y_min] | |
| assert len(filtered_pairs) > 0, f"No valid pairs after filtering with y_min={y_min}" | |
| filtered_rho, filtered_y = zip(*filtered_pairs) | |
| filtered_rho, filtered_y = list(filtered_rho), list(filtered_y) | |
| # Calculate AUP: first term + trapezoidal sum | |
| aup = filtered_rho[0] * filtered_y[0] | |
| formula_parts = [f"{filtered_rho[0]:.2f} * {filtered_y[0]:.2f}"] | |
| for i in range(1, len(filtered_rho)): | |
| y_i = filtered_y[i] | |
| y_prev = filtered_y[i-1] | |
| w_i = weight_function(y_i, y_max, alpha) | |
| w_prev = weight_function(y_prev, y_max, alpha) | |
| term = 0.5 * (filtered_rho[i] - filtered_rho[i-1]) * (y_i * w_i + y_prev * w_prev) | |
| aup += term | |
| formula_parts.append(f"({filtered_rho[i]:.2f}-{filtered_rho[i-1]:.2f}) * ({y_i:.2f} * {w_i:.4f} + {y_prev:.2f} * {w_prev:.4f})") | |
| if is_print: | |
| formula = f" AUP = " + " + ".join(formula_parts) + f" = {aup:.2f}" | |
| print(formula) | |
| return aup | |