|
|
|
|
|
from __future__ import annotations |
|
|
import re |
|
|
from typing import Dict, Any, Tuple, List |
|
|
|
|
|
|
|
|
UNIT_SCALE = { |
|
|
"円": 1.0, |
|
|
"千円": 1e3, |
|
|
"万円": 1e4, |
|
|
"百万円": 1e6, |
|
|
"千万円": 1e7, |
|
|
"億円": 1e8, |
|
|
"十億円": 1e10, |
|
|
|
|
|
"thousands of yen": 1e3, |
|
|
"thousand yen": 1e3, |
|
|
"in thousands of yen": 1e3, |
|
|
"millions of yen": 1e6, |
|
|
"in millions of yen": 1e6, |
|
|
"jpy in thousands": 1e3, |
|
|
"jpy in millions": 1e6, |
|
|
} |
|
|
|
|
|
|
|
|
_PATTERNS = [ |
|
|
r"単位[::\s]*?(千円|百万円|万円|円|千万円|億円|十億円)", |
|
|
r"(?:Amounts?|Figures?)\s+in\s+(thousands|millions)\s+of\s+yen", |
|
|
r"(?:JPY|YEN)\s+in\s+(thousands|millions)", |
|
|
r"(?:in\s+)?(thousands|millions)\s+of\s+yen", |
|
|
] |
|
|
|
|
|
def detect_unit_scale(text: str) -> Tuple[float, str, List[str]]: |
|
|
""" |
|
|
テキストから単位を推定し (scale, label, hits) を返す。 |
|
|
見つからない場合は (1.0, '円', [])。 |
|
|
複数ヒット時は多数決→先勝で決定。 |
|
|
""" |
|
|
hits: List[str] = [] |
|
|
t = text.lower() |
|
|
for pat in _PATTERNS: |
|
|
for m in re.finditer(pat, text, flags=re.IGNORECASE): |
|
|
g = m.group(1).lower() |
|
|
if g in ("千円", "百万円", "万円", "円", "千万円", "億円", "十億円"): |
|
|
hits.append(g) |
|
|
elif g in ("thousands", "thousand"): |
|
|
hits.append("thousands of yen") |
|
|
elif g in ("millions", "million"): |
|
|
hits.append("millions of yen") |
|
|
|
|
|
if not hits: |
|
|
return 1.0, "円", [] |
|
|
|
|
|
|
|
|
from collections import Counter |
|
|
cnt = Counter(hits) |
|
|
label = cnt.most_common(1)[0][0] |
|
|
|
|
|
|
|
|
if label in ("thousands of yen", "thousand yen", "in thousands of yen", "jpy in thousands"): |
|
|
label = "千円" |
|
|
if label in ("millions of yen", "in millions of yen", "jpy in millions"): |
|
|
label = "百万円" |
|
|
|
|
|
scale = UNIT_SCALE.get(label, 1.0) |
|
|
return scale, label, hits |
|
|
|
|
|
def _scale_number(v: Any, scale: float) -> Any: |
|
|
if v is None: |
|
|
return None |
|
|
try: |
|
|
f = float(v) |
|
|
except Exception: |
|
|
return v |
|
|
return f * scale |
|
|
|
|
|
def apply_unit_scale(fin: Dict[str, Any], scale: float) -> Dict[str, Any]: |
|
|
""" |
|
|
抽出JSON(balance_sheet / income_statement / cash_flows の数値)に換算を適用。 |
|
|
比率などが混ざるケースを想定し、-1~1の狭い値はそのまま残す (誤変換防止)。 |
|
|
""" |
|
|
def maybe_scale(x): |
|
|
if x is None: |
|
|
return None |
|
|
try: |
|
|
f = float(x) |
|
|
except Exception: |
|
|
return x |
|
|
|
|
|
if -1.0 <= f <= 1.0 and scale > 1: |
|
|
return f |
|
|
return f * scale |
|
|
|
|
|
out = {k: (v.copy() if isinstance(v, dict) else v) for k, v in fin.items()} |
|
|
for sec in ("balance_sheet", "income_statement", "cash_flows"): |
|
|
d = out.get(sec, {}) |
|
|
if isinstance(d, dict): |
|
|
for k, v in d.items(): |
|
|
d[k] = maybe_scale(v) |
|
|
out[sec] = d |
|
|
return out |
|
|
|