Spaces:
Sleeping
Sleeping
| from abc import ABC, abstractmethod | |
| from env.models import AuditReport | |
| SCORE_EPS = 1e-6 | |
| class BaseTask(ABC): | |
| def get_description(self) -> str: | |
| ... | |
| def get_table_names(self) -> list[str]: | |
| ... | |
| def grade(self, report: AuditReport, gold: dict) -> tuple[float, dict]: | |
| ... | |
| def brier_adjust(base: float, confidence: float, correct: bool) -> float: | |
| c = 1.0 if correct else 0.0 | |
| brier = (confidence - c) ** 2 | |
| return base * (1.0 - 0.3 * brier) | |
| def strict_score(value: float, epsilon: float = SCORE_EPS) -> float: | |
| try: | |
| score = float(value) | |
| except Exception: | |
| score = epsilon | |
| if not (score > 0.0): | |
| return epsilon | |
| if not (score < 1.0): | |
| return 1.0 - epsilon | |
| return score | |
| def count_accuracy(reported: int, actual: int, tolerance: float = 0.15) -> float: | |
| if actual == 0: | |
| return 1.0 if reported == 0 else 0.0 | |
| ratio = abs(reported - actual) / actual | |
| if ratio <= tolerance: | |
| return max(0.0, 1.0 - ratio / tolerance) | |
| return max(0.0, 1.0 - ratio) | |