Spaces:
Sleeping
Sleeping
Create accuracy.py
Browse files- nlp/accuracy.py +54 -0
nlp/accuracy.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
from .detectors import \
|
| 3 |
+
risk_confidence_ratio
|
| 4 |
+
|
| 5 |
+
# Minimal deterministic math checks: sum/avg/% and 4% rule flag
|
| 6 |
+
|
| 7 |
+
PCT_RE = re.compile(r"(\d{1,3}(?:\.\d+)?)\s*%")
|
| 8 |
+
NUM_RE = re.compile(r"\b\d+(?:,\d{3})*(?:\.\d+)?\b")
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def _calc_error_rate(text: str) -> float:
|
| 12 |
+
# Placeholder: no actual recalculation; we penalize suspicious patterns lightly
|
| 13 |
+
# (You can extend to detect inconsistent sums if you pass structured numbers.)
|
| 14 |
+
return 0.0
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def _implausible_return_flag(text: str) -> int:
|
| 18 |
+
t = text.lower()
|
| 19 |
+
if "guaranteed" in t and PCT_RE.search(t):
|
| 20 |
+
val = float(PCT_RE.search(t).group(1))
|
| 21 |
+
return 1 if val >= 12.0 else 0
|
| 22 |
+
return 0
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def _unqualified_4pct_flag(text: str) -> int:
|
| 26 |
+
t = text.lower()
|
| 27 |
+
if "4%" in t and ("always" in t or "guaranteed" in t):
|
| 28 |
+
return 1
|
| 29 |
+
return 0
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def _omission_penalty(text: str) -> float:
|
| 33 |
+
# If performance touted but no mention of fees/taxes/sequence risk
|
| 34 |
+
t = text.lower()
|
| 35 |
+
perf = ("return" in t or "%" in t)
|
| 36 |
+
hygiene = any(w in t for w in ["fee", "tax", "sequence of returns"])
|
| 37 |
+
return 0.2 if (perf and not hygiene) else 0.0
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def score(text: str):
|
| 41 |
+
calc_err = _calc_error_rate(text)
|
| 42 |
+
impl_flag = _implausible_return_flag(text)
|
| 43 |
+
w4_flag = _unqualified_4pct_flag(text)
|
| 44 |
+
omission = _omission_penalty(text)
|
| 45 |
+
|
| 46 |
+
sub = 1.0 - calc_err - 0.2*impl_flag - 0.2*w4_flag - 0.1*omission
|
| 47 |
+
sub = max(0.0, min(1.0, sub))
|
| 48 |
+
return {
|
| 49 |
+
'subscore': sub,
|
| 50 |
+
'calc_error_rate': calc_err,
|
| 51 |
+
'implausible_return_flag': impl_flag,
|
| 52 |
+
'unqualified_4pct_flag': w4_flag,
|
| 53 |
+
'omission_penalty': omission,
|
| 54 |
+
}
|