Spaces:
Sleeping
Sleeping
File size: 2,316 Bytes
5823ed6 ff2c62b 5823ed6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
import pandera as pa
from pandera import Column, DataFrameSchema, Check
import pandas as pd
fin_schema = DataFrameSchema(
{
"year": Column(int, Check.ge(1900)),
"quarter": Column(str),
"revenue": Column(float, Check.ge(0)),
"ebit": Column(float),
"net_income": Column(float),
"total_assets": Column(float, nullable=True),
"total_equity": Column(float, nullable=True),
},
coerce=True,
)
FIN_REQUIRED = ["year","quarter","revenue","ebit","net_income","total_assets","total_equity"]
ESG_REQUIRED = ["year","metric","value","unit","scope","notes"]
ALIASES = {
"revenue": ["revenue","sales","売上","売上高"],
"ebit": ["ebit","operating_income","営業利益"],
"net_income": ["net_income","純利益","profit"],
"total_equity": ["total_equity","shareholders_equity","自己資本"],
}
def normalize_columns(df: pd.DataFrame, required: list) -> pd.DataFrame:
cols = {c.lower(): c for c in df.columns}
# 別名を正規化
for key, names in ALIASES.items():
if key not in df.columns:
for n in names:
if n in df.columns or n in cols:
src = n if n in df.columns else cols.get(n)
df = df.rename(columns={src: key})
break
missing = [c for c in required if c not in df.columns]
if missing:
raise ValueError(f"必須列不足: {missing}")
return df
fin_schema = DataFrameSchema({
"year": Column(int, Check.ge(1900)),
"quarter": Column(str),
"revenue": Column(float, Check.ge(0)),
"ebit": Column(float),
"net_income": Column(float),
"total_assets": Column(float, nullable=True),
"total_equity": Column(float, nullable=True),
})
esg_schema = DataFrameSchema({
"year": Column(int, Check.ge(1900)),
"metric": Column(str),
"value": Column(float),
"unit": Column(str, nullable=True),
"scope": Column(str, nullable=True),
"notes": Column(object, nullable=True),
})
def validate_financials(df: pd.DataFrame) -> pd.DataFrame:
df = normalize_columns(df, FIN_REQUIRED)
return fin_schema.validate(df, lazy=True)
def validate_esg(df: pd.DataFrame) -> pd.DataFrame:
df = normalize_columns(df, ESG_REQUIRED)
return esg_schema.validate(df, lazy=True)
|