Spaces:
Runtime error
Runtime error
| import pandas as pd | |
| from dataclasses import dataclass | |
| from pathlib import Path | |
| from src.features.construction_age_band_sap import normalize_construction_age_band, windows_feature_engineering_vectorised | |
| from src.features.energy_system import energy_system_feature_engineering_vectorised | |
| from src.features.floor import floor_feature_engineering_fast | |
| from src.features.walls import wall_feature_engineering | |
| from src.features.roofs import roof_feature_engineering | |
| def build_age_band_lookup(series: pd.Series): | |
| """ | |
| Build lookup dict: | |
| raw EPC CONSTRUCTION_AGE_BAND -> (sap_band_letter, sap_band_label) | |
| """ | |
| unique_vals = series.dropna().unique() | |
| lookup = {} | |
| for v in unique_vals: | |
| letter, label = normalize_construction_age_band(v) | |
| lookup[v] = (letter, label) | |
| return lookup | |
| def age_band_to_sap_letter(df: pd.DataFrame)-> pd.DataFrame: | |
| df = df.copy() | |
| lookup = build_age_band_lookup(df["CONSTRUCTION_AGE_BAND"]) | |
| age_df = ( | |
| pd.DataFrame.from_dict( | |
| lookup, | |
| orient="index", | |
| columns=["sap_band_letter", "sap_band_label"] | |
| ) | |
| ) | |
| df = df.join(age_df, on="CONSTRUCTION_AGE_BAND") | |
| return df | |
| EFF_MAP = { | |
| "very poor": 0.60, | |
| "poor": 0.68, | |
| "average": 0.75, | |
| "good": 0.85, | |
| "very good": 0.92 | |
| } | |
| DHW_EFF_MAP = { | |
| "very poor": 0.65, | |
| "poor": 0.72, | |
| "average": 0.78, | |
| "good": 0.85, | |
| "very good": 0.90 | |
| } | |
| energy_system_columns = [ | |
| "MAIN_HEATING_SYSTEM","SECONDARY_HEATING_SYSTEM", | |
| "MAIN_FUEL_TYPE","DHW_SUPPLY_SYSTEM","VENTILATION_SYSTEM", | |
| "LIGHTING_FRACTION_LOW_ENERGY","PV_KWP","MAINHEAT_EFF_NUM","ROOF_MM_S9", | |
| "HOT_WATER_ENERGY_NUM" | |
| ] | |
| envelop_columns = [ | |
| "FLOOR_U_VALUE","FLOOR_INSULATION_TYPE","FLOOR_BOUNDARY_TYPE", | |
| "WALL_U_VALUE","WALL_TYPE","WALL_INSULATION_MODEL", | |
| "ROOF_U_VALUE","ROOF_CLASS","ROOF_INSULATION_TYPE", | |
| "glazing_area_m2","glazing_type" | |
| ] | |
| general_details = [ | |
| "PROPERTY_TYPE","TOTAL_FLOOR_AREA", | |
| "BUILT_FORM","sap_band_letter","FLOOR_HEIGHT" | |
| ] | |
| features = energy_system_columns + envelop_columns + general_details | |
| cat_cols = [ | |
| "MAIN_HEATING_SYSTEM","SECONDARY_HEATING_SYSTEM", | |
| "MAIN_FUEL_TYPE","DHW_SUPPLY_SYSTEM","VENTILATION_SYSTEM", | |
| "FLOOR_INSULATION_TYPE","FLOOR_BOUNDARY_TYPE", | |
| "WALL_TYPE","WALL_INSULATION_MODEL", | |
| "ROOF_CLASS","ROOF_INSULATION_TYPE", | |
| "glazing_type", | |
| "PROPERTY_TYPE","BUILT_FORM","sap_band_letter" | |
| ] | |
| class SAPTables: | |
| s3: pd.DataFrame | |
| walls_u: pd.DataFrame | |
| s9: pd.DataFrame | |
| s10: pd.DataFrame | |
| def from_local_dir(cls, base_dir: str) -> "SAPTables": | |
| base = Path(base_dir) | |
| return cls( | |
| s3=pd.read_excel(base / "S3_sap.xlsx"), | |
| walls_u=pd.read_excel(base / "external_wall_u_values2.xlsx"), | |
| s9=pd.read_excel(base / "SAP_Table_ROOF_S9.xlsx"), | |
| s10=pd.read_excel(base / "SAP_Table_ROOF_S10.xlsx"), | |
| ) | |
| class EPCFeatureEngineer: | |
| def __init__(self, sap: SAPTables): | |
| self.sap = sap | |
| def transform(self, df: pd.DataFrame) -> pd.DataFrame: | |
| df = df.copy() | |
| df.replace("", pd.NA, inplace=True) | |
| df["FLOOR_HEIGHT"] = df["FLOOR_HEIGHT"].fillna(2.5) | |
| # SAP age bands | |
| df = age_band_to_sap_letter(df) | |
| # Envelope | |
| df = windows_feature_engineering_vectorised(df) | |
| df = energy_system_feature_engineering_vectorised(df) | |
| df = floor_feature_engineering_fast(df, self.sap.s3) | |
| df = wall_feature_engineering(df, self.sap.walls_u) | |
| df = roof_feature_engineering(df, self.sap.s9, self.sap.s10) | |
| # Heating efficiency | |
| df["MAINHEAT_EFF_NUM"] = ( | |
| df["MAINHEAT_ENERGY_EFF"] | |
| .str.lower() | |
| .map(EFF_MAP) | |
| .fillna(0.75) | |
| ) | |
| # Hot water efficiency | |
| df["HOT_WATER_ENERGY_NUM"] = ( | |
| df["HOT_WATER_ENERGY_EFF"] | |
| .str.lower() | |
| .map(DHW_EFF_MAP) | |
| ) | |
| df.loc[ | |
| df["HOT_WATER_ENERGY_NUM"].isna() & | |
| df["DHW_SUPPLY_SYSTEM"].notna(), | |
| "HOT_WATER_ENERGY_NUM" | |
| ] = 0.78 | |
| df["HOT_WATER_ENERGY_NUM"] = df["HOT_WATER_ENERGY_NUM"].fillna(0.75) | |
| # Categoricals | |
| df[cat_cols] = df[cat_cols].fillna("UNKNOWN").astype(str) | |
| return df[features] |