Corin1998 commited on
Commit
e582bdf
·
verified ·
1 Parent(s): 2394322

Create units.py

Browse files
Files changed (1) hide show
  1. core/units.py +45 -0
core/units.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ from typing import Optional, Dict
3
+
4
+ _UNIT_TABLE: Dict[str, float] = {
5
+ "円": 1.0,
6
+ "千円": 1_000.0,
7
+ "百万円": 1_000_000.0, # 100万円 = 1,000,000 円
8
+ "千万円": 10_000_000.0,
9
+ "億円": 100_000_000.0,
10
+ }
11
+
12
+ def detect_unit(text: str) -> Optional[str]:
13
+ """
14
+ 「単位:千円」「単位: 百万円」「単位は億円」などから最頻のものを拾う
15
+ """
16
+ if not text: return None
17
+ cand = re.findall(r"単位[::\s]*([^\s\)((]+?円)", text)
18
+ for u in cand:
19
+ if u in _UNIT_TABLE:
20
+ return u
21
+ # 「(単位:千円)」のような括弧パターンも拾う
22
+ cand2 = re.findall(r"[((]\s*単位[::\s]*([^\s\))]+?円)\s*[))]", text)
23
+ for u in cand2:
24
+ if u in _UNIT_TABLE:
25
+ return u
26
+ return None
27
+
28
+ def unit_factor(unit_label: Optional[str]) -> float:
29
+ if unit_label in _UNIT_TABLE:
30
+ return _UNIT_TABLE[unit_label]
31
+ return 1.0 # 既定は円
32
+
33
+ def scale_financials_yen(fin: dict, factor: float) -> dict:
34
+ """抽出された数値(PDFの単位ベース)を円に換算して返す"""
35
+ if not fin: return fin
36
+ out = {k:(v if not isinstance(v, dict) else v.copy()) for k,v in fin.items()}
37
+ for sec in ("balance_sheet","income_statement","cash_flows"):
38
+ d = out.get(sec)
39
+ if not isinstance(d, dict): continue
40
+ for k, v in d.items():
41
+ try:
42
+ out[sec][k] = None if v in (None,"", "null") else float(v) * factor
43
+ except Exception:
44
+ out[sec][k] = None
45
+ return out