Spaces:
Sleeping
Sleeping
| import pandera as pa | |
| from pandera import Column, DataFrameSchema, Check | |
| import pandas as pd | |
| fin_schema = DataFrameSchema( | |
| { | |
| "year": Column(int, Check.ge(1900)), | |
| "quarter": Column(str), | |
| "revenue": Column(float, Check.ge(0)), | |
| "ebit": Column(float), | |
| "net_income": Column(float), | |
| "total_assets": Column(float, nullable=True), | |
| "total_equity": Column(float, nullable=True), | |
| }, | |
| coerce=True, | |
| ) | |
| FIN_REQUIRED = ["year","quarter","revenue","ebit","net_income","total_assets","total_equity"] | |
| ESG_REQUIRED = ["year","metric","value","unit","scope","notes"] | |
| ALIASES = { | |
| "revenue": ["revenue","sales","売上","売上高"], | |
| "ebit": ["ebit","operating_income","営業利益"], | |
| "net_income": ["net_income","純利益","profit"], | |
| "total_equity": ["total_equity","shareholders_equity","自己資本"], | |
| } | |
| def normalize_columns(df: pd.DataFrame, required: list) -> pd.DataFrame: | |
| cols = {c.lower(): c for c in df.columns} | |
| # 別名を正規化 | |
| for key, names in ALIASES.items(): | |
| if key not in df.columns: | |
| for n in names: | |
| if n in df.columns or n in cols: | |
| src = n if n in df.columns else cols.get(n) | |
| df = df.rename(columns={src: key}) | |
| break | |
| missing = [c for c in required if c not in df.columns] | |
| if missing: | |
| raise ValueError(f"必須列不足: {missing}") | |
| return df | |
| fin_schema = DataFrameSchema({ | |
| "year": Column(int, Check.ge(1900)), | |
| "quarter": Column(str), | |
| "revenue": Column(float, Check.ge(0)), | |
| "ebit": Column(float), | |
| "net_income": Column(float), | |
| "total_assets": Column(float, nullable=True), | |
| "total_equity": Column(float, nullable=True), | |
| }) | |
| esg_schema = DataFrameSchema({ | |
| "year": Column(int, Check.ge(1900)), | |
| "metric": Column(str), | |
| "value": Column(float), | |
| "unit": Column(str, nullable=True), | |
| "scope": Column(str, nullable=True), | |
| "notes": Column(object, nullable=True), | |
| }) | |
| def validate_financials(df: pd.DataFrame) -> pd.DataFrame: | |
| df = normalize_columns(df, FIN_REQUIRED) | |
| return fin_schema.validate(df, lazy=True) | |
| def validate_esg(df: pd.DataFrame) -> pd.DataFrame: | |
| df = normalize_columns(df, ESG_REQUIRED) | |
| return esg_schema.validate(df, lazy=True) | |