File size: 1,577 Bytes
e582bdf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
import re
from typing import Optional, Dict
_UNIT_TABLE: Dict[str, float] = {
"円": 1.0,
"千円": 1_000.0,
"百万円": 1_000_000.0, # 100万円 = 1,000,000 円
"千万円": 10_000_000.0,
"億円": 100_000_000.0,
}
def detect_unit(text: str) -> Optional[str]:
"""
「単位:千円」「単位: 百万円」「単位は億円」などから最頻のものを拾う
"""
if not text: return None
cand = re.findall(r"単位[::\s]*([^\s\)((]+?円)", text)
for u in cand:
if u in _UNIT_TABLE:
return u
# 「(単位:千円)」のような括弧パターンも拾う
cand2 = re.findall(r"[((]\s*単位[::\s]*([^\s\))]+?円)\s*[))]", text)
for u in cand2:
if u in _UNIT_TABLE:
return u
return None
def unit_factor(unit_label: Optional[str]) -> float:
if unit_label in _UNIT_TABLE:
return _UNIT_TABLE[unit_label]
return 1.0 # 既定は円
def scale_financials_yen(fin: dict, factor: float) -> dict:
"""抽出された数値(PDFの単位ベース)を円に換算して返す"""
if not fin: return fin
out = {k:(v if not isinstance(v, dict) else v.copy()) for k,v in fin.items()}
for sec in ("balance_sheet","income_statement","cash_flows"):
d = out.get(sec)
if not isinstance(d, dict): continue
for k, v in d.items():
try:
out[sec][k] = None if v in (None,"", "null") else float(v) * factor
except Exception:
out[sec][k] = None
return out
|