navaneethkrishnan commited on
Commit
f0f5aab
·
verified ·
1 Parent(s): cb9b663

Create accuracy.py

Browse files
Files changed (1) hide show
  1. nlp/accuracy.py +54 -0
nlp/accuracy.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ from .detectors import \
3
+ risk_confidence_ratio
4
+
5
+ # Minimal deterministic math checks: sum/avg/% and 4% rule flag
6
+
7
+ PCT_RE = re.compile(r"(\d{1,3}(?:\.\d+)?)\s*%")
8
+ NUM_RE = re.compile(r"\b\d+(?:,\d{3})*(?:\.\d+)?\b")
9
+
10
+
11
+ def _calc_error_rate(text: str) -> float:
12
+ # Placeholder: no actual recalculation; we penalize suspicious patterns lightly
13
+ # (You can extend to detect inconsistent sums if you pass structured numbers.)
14
+ return 0.0
15
+
16
+
17
+ def _implausible_return_flag(text: str) -> int:
18
+ t = text.lower()
19
+ if "guaranteed" in t and PCT_RE.search(t):
20
+ val = float(PCT_RE.search(t).group(1))
21
+ return 1 if val >= 12.0 else 0
22
+ return 0
23
+
24
+
25
+ def _unqualified_4pct_flag(text: str) -> int:
26
+ t = text.lower()
27
+ if "4%" in t and ("always" in t or "guaranteed" in t):
28
+ return 1
29
+ return 0
30
+
31
+
32
+ def _omission_penalty(text: str) -> float:
33
+ # If performance touted but no mention of fees/taxes/sequence risk
34
+ t = text.lower()
35
+ perf = ("return" in t or "%" in t)
36
+ hygiene = any(w in t for w in ["fee", "tax", "sequence of returns"])
37
+ return 0.2 if (perf and not hygiene) else 0.0
38
+
39
+
40
+ def score(text: str):
41
+ calc_err = _calc_error_rate(text)
42
+ impl_flag = _implausible_return_flag(text)
43
+ w4_flag = _unqualified_4pct_flag(text)
44
+ omission = _omission_penalty(text)
45
+
46
+ sub = 1.0 - calc_err - 0.2*impl_flag - 0.2*w4_flag - 0.1*omission
47
+ sub = max(0.0, min(1.0, sub))
48
+ return {
49
+ 'subscore': sub,
50
+ 'calc_error_rate': calc_err,
51
+ 'implausible_return_flag': impl_flag,
52
+ 'unqualified_4pct_flag': w4_flag,
53
+ 'omission_penalty': omission,
54
+ }