Spaces:
Sleeping
Sleeping
File size: 2,959 Bytes
91e7690 ab076fb 91e7690 3e987ed | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 | from tasks.base import BaseTask
from env.models import AuditReport
class Task3(BaseTask):
def get_description(self) -> str:
return (
"Compare 'transactions_baseline' (last month) with 'transactions_current' (this month). "
"Detect silent data drift: mean/distribution shifts in numeric columns, new category "
"values not present in baseline, and referential drift (new user_ids not in baseline). "
"Nothing is explicitly labelled wrong — you must find it statistically."
)
def get_table_names(self) -> list[str]:
return ["transactions_baseline", "transactions_current"]
def grade(self, report: AuditReport, gold: dict) -> tuple[float, dict]:
scores: dict[str, float] = {}
amount_drift = report.drift_details.get("amount")
if amount_drift:
detected = "shift" in str(amount_drift.value).lower() or "mean" in str(amount_drift.value).lower()
scores["mean_shift"] = self.brier_adjust(1.0 if detected else 0.0, amount_drift.confidence, detected)
else:
scores["mean_shift"] = 0.0
new_cat_mentioned = any(
"categor" in str(v).lower() or "crypto" in str(v).lower() or "nft" in str(v).lower()
for v in [report.drift_details, report.recommended_fixes]
)
cat_drift = report.drift_details.get("category")
if cat_drift:
reported_cats = {x.strip() for x in str(cat_drift.value).split(",") if x.strip()}
actual_cats = set(gold["new_categories"])
precision = len(reported_cats & actual_cats) / max(len(reported_cats), 1)
recall = len(reported_cats & actual_cats) / max(len(actual_cats), 1)
f1 = 2 * precision * recall / max(precision + recall, 1e-6)
scores["new_cats"] = self.brier_adjust(f1, cat_drift.confidence, f1 > 0.4)
else:
scores["new_cats"] = 0.3 if new_cat_mentioned else 0.0
ref_drift = report.drift_details.get("user_id")
if ref_drift:
try:
cleaned = str(ref_drift.value).replace("%", " ").strip()
token = cleaned.split()[0]
reported_pct = float(token)
if reported_pct > 1:
reported_pct /= 100.0
actual_pct = float(gold["referential_drift_pct"])
within_5pct = abs(reported_pct - actual_pct) <= 0.05
scores["ref_drift"] = self.brier_adjust(1.0 if within_5pct else 0.5, ref_drift.confidence, within_5pct)
except Exception:
scores["ref_drift"] = 0.2
else:
scores["ref_drift"] = 0.0
scores = {k: self.strict_score(v) for k, v in scores.items()}
weights = {"mean_shift": 0.40, "new_cats": 0.35, "ref_drift": 0.25}
total = sum(scores[k] * weights[k] for k in weights)
return self.strict_score(round(total, 4)), scores
|