|
|
import re |
|
|
from typing import Optional, Dict |
|
|
|
|
|
_UNIT_TABLE: Dict[str, float] = { |
|
|
"円": 1.0, |
|
|
"千円": 1_000.0, |
|
|
"百万円": 1_000_000.0, |
|
|
"千万円": 10_000_000.0, |
|
|
"億円": 100_000_000.0, |
|
|
} |
|
|
|
|
|
def detect_unit(text: str) -> Optional[str]: |
|
|
""" |
|
|
「単位:千円」「単位: 百万円」「単位は億円」などから最頻のものを拾う |
|
|
""" |
|
|
if not text: return None |
|
|
cand = re.findall(r"単位[::\s]*([^\s\)((]+?円)", text) |
|
|
for u in cand: |
|
|
if u in _UNIT_TABLE: |
|
|
return u |
|
|
|
|
|
cand2 = re.findall(r"[((]\s*単位[::\s]*([^\s\))]+?円)\s*[))]", text) |
|
|
for u in cand2: |
|
|
if u in _UNIT_TABLE: |
|
|
return u |
|
|
return None |
|
|
|
|
|
def unit_factor(unit_label: Optional[str]) -> float: |
|
|
if unit_label in _UNIT_TABLE: |
|
|
return _UNIT_TABLE[unit_label] |
|
|
return 1.0 |
|
|
|
|
|
def scale_financials_yen(fin: dict, factor: float) -> dict: |
|
|
"""抽出された数値(PDFの単位ベース)を円に換算して返す""" |
|
|
if not fin: return fin |
|
|
out = {k:(v if not isinstance(v, dict) else v.copy()) for k,v in fin.items()} |
|
|
for sec in ("balance_sheet","income_statement","cash_flows"): |
|
|
d = out.get(sec) |
|
|
if not isinstance(d, dict): continue |
|
|
for k, v in d.items(): |
|
|
try: |
|
|
out[sec][k] = None if v in (None,"", "null") else float(v) * factor |
|
|
except Exception: |
|
|
out[sec][k] = None |
|
|
return out |
|
|
|