import re from typing import Optional, Dict _UNIT_TABLE: Dict[str, float] = { "円": 1.0, "千円": 1_000.0, "百万円": 1_000_000.0, # 100万円 = 1,000,000 円 "千万円": 10_000_000.0, "億円": 100_000_000.0, } def detect_unit(text: str) -> Optional[str]: """ 「単位:千円」「単位: 百万円」「単位は億円」などから最頻のものを拾う """ if not text: return None cand = re.findall(r"単位[::\s]*([^\s\)((]+?円)", text) for u in cand: if u in _UNIT_TABLE: return u # 「(単位:千円)」のような括弧パターンも拾う cand2 = re.findall(r"[((]\s*単位[::\s]*([^\s\))]+?円)\s*[))]", text) for u in cand2: if u in _UNIT_TABLE: return u return None def unit_factor(unit_label: Optional[str]) -> float: if unit_label in _UNIT_TABLE: return _UNIT_TABLE[unit_label] return 1.0 # 既定は円 def scale_financials_yen(fin: dict, factor: float) -> dict: """抽出された数値(PDFの単位ベース)を円に換算して返す""" if not fin: return fin out = {k:(v if not isinstance(v, dict) else v.copy()) for k,v in fin.items()} for sec in ("balance_sheet","income_statement","cash_flows"): d = out.get(sec) if not isinstance(d, dict): continue for k, v in d.items(): try: out[sec][k] = None if v in (None,"", "null") else float(v) * factor except Exception: out[sec][k] = None return out