File size: 1,577 Bytes
e582bdf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import re
from typing import Optional, Dict

_UNIT_TABLE: Dict[str, float] = {
    "円": 1.0,
    "千円": 1_000.0,
    "百万円": 1_000_000.0,   # 100万円 = 1,000,000 円
    "千万円": 10_000_000.0,
    "億円": 100_000_000.0,
}

def detect_unit(text: str) -> Optional[str]:
    """
    「単位:千円」「単位: 百万円」「単位は億円」などから最頻のものを拾う
    """
    if not text: return None
    cand = re.findall(r"単位[::\s]*([^\s\)((]+?円)", text)
    for u in cand:
        if u in _UNIT_TABLE:
            return u
    # 「(単位:千円)」のような括弧パターンも拾う
    cand2 = re.findall(r"[((]\s*単位[::\s]*([^\s\))]+?円)\s*[))]", text)
    for u in cand2:
        if u in _UNIT_TABLE:
            return u
    return None

def unit_factor(unit_label: Optional[str]) -> float:
    if unit_label in _UNIT_TABLE:
        return _UNIT_TABLE[unit_label]
    return 1.0  # 既定は円

def scale_financials_yen(fin: dict, factor: float) -> dict:
    """抽出された数値(PDFの単位ベース)を円に換算して返す"""
    if not fin: return fin
    out = {k:(v if not isinstance(v, dict) else v.copy()) for k,v in fin.items()}
    for sec in ("balance_sheet","income_statement","cash_flows"):
        d = out.get(sec)
        if not isinstance(d, dict): continue
        for k, v in d.items():
            try:
                out[sec][k] = None if v in (None,"", "null") else float(v) * factor
            except Exception:
                out[sec][k] = None
    return out