Buckets:
bbkdevops/unicosys-hypergraph-bucket / tinymind-native-colab-handoff /bundle /evaluation /quality_gates.py
| """Measurable quality gates for training and sparse export.""" | |
| from __future__ import annotations | |
| from collections.abc import Iterable | |
| import re | |
| import torch | |
| def _tokens(text: str) -> set[str]: | |
| return {t.lower() for t in re.findall(r"[\wก-๙]+", text, flags=re.UNICODE) if len(t) > 1} | |
| def score_qa_answer(question: str, answer: str) -> float: | |
| answer = answer.strip() | |
| if not answer: | |
| return 0.0 | |
| q = _tokens(question) | |
| a = _tokens(answer) | |
| overlap = len(q & a) / max(len(q), 1) | |
| length = min(len(answer) / 180.0, 1.0) | |
| bad = any(p in answer.lower() for p in ["as an ai", "i don't know", "ไม่ทราบ", "ไม่แน่ใจ"]) | |
| penalty = 0.25 if bad else 0.0 | |
| return max(0.0, min(1.0, 0.5 * overlap + 0.45 * length + 0.05 - penalty)) | |
| def evaluate_qa_holdout(rows: Iterable[dict], threshold: float = 0.35) -> dict: | |
| scores = [ | |
| score_qa_answer(str(row.get("question", "")), str(row.get("answer", ""))) | |
| for row in rows | |
| ] | |
| avg = sum(scores) / max(len(scores), 1) | |
| return { | |
| "count": len(scores), | |
| "average_score": avg, | |
| "threshold": threshold, | |
| "passed": bool(scores) and avg >= threshold, | |
| } | |
| def compare_tensor_drift( | |
| baseline: torch.Tensor, | |
| candidate: torch.Tensor, | |
| max_mean_abs_delta: float, | |
| ) -> dict: | |
| if baseline.shape != candidate.shape: | |
| return { | |
| "passed": False, | |
| "reason": f"shape mismatch {tuple(baseline.shape)} != {tuple(candidate.shape)}", | |
| "mean_abs_delta": float("inf"), | |
| } | |
| delta = (baseline.float() - candidate.float()).abs().mean().item() | |
| return { | |
| "passed": delta <= max_mean_abs_delta, | |
| "mean_abs_delta": delta, | |
| "max_mean_abs_delta": max_mean_abs_delta, | |
| } | |
Xet Storage Details
- Size:
- 1.8 kB
- Xet hash:
- f17f0b3ac691e88279f0a29f7fa2a560dd40e688cc104484946f9a5d7fbecaa9
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.