epc_only_data_model / src /features /build_features.py
zcemg08's picture
upgrade code for retrofit walls and roofs calls
c831cba
import pandas as pd
from dataclasses import dataclass
from pathlib import Path
from src.features.construction_age_band_sap import normalize_construction_age_band, windows_feature_engineering_vectorised
from src.features.energy_system import energy_system_feature_engineering_vectorised
from src.features.floor import floor_feature_engineering_fast
from src.features.walls import wall_feature_engineering
from src.features.roofs import roof_feature_engineering
def build_age_band_lookup(series: pd.Series):
"""
Build lookup dict:
raw EPC CONSTRUCTION_AGE_BAND -> (sap_band_letter, sap_band_label)
"""
unique_vals = series.dropna().unique()
lookup = {}
for v in unique_vals:
letter, label = normalize_construction_age_band(v)
lookup[v] = (letter, label)
return lookup
def age_band_to_sap_letter(df: pd.DataFrame)-> pd.DataFrame:
df = df.copy()
lookup = build_age_band_lookup(df["CONSTRUCTION_AGE_BAND"])
age_df = (
pd.DataFrame.from_dict(
lookup,
orient="index",
columns=["sap_band_letter", "sap_band_label"]
)
)
df = df.join(age_df, on="CONSTRUCTION_AGE_BAND")
return df
EFF_MAP = {
"very poor": 0.60,
"poor": 0.68,
"average": 0.75,
"good": 0.85,
"very good": 0.92
}
DHW_EFF_MAP = {
"very poor": 0.65,
"poor": 0.72,
"average": 0.78,
"good": 0.85,
"very good": 0.90
}
energy_system_columns = [
"MAIN_HEATING_SYSTEM","SECONDARY_HEATING_SYSTEM",
"MAIN_FUEL_TYPE","DHW_SUPPLY_SYSTEM","VENTILATION_SYSTEM",
"LIGHTING_FRACTION_LOW_ENERGY","PV_KWP","MAINHEAT_EFF_NUM","ROOF_MM_S9",
"HOT_WATER_ENERGY_NUM"
]
envelop_columns = [
"FLOOR_U_VALUE","FLOOR_INSULATION_TYPE","FLOOR_BOUNDARY_TYPE",
"WALL_U_VALUE","WALL_TYPE","WALL_INSULATION_MODEL",
"ROOF_U_VALUE","ROOF_CLASS","ROOF_INSULATION_TYPE",
"glazing_area_m2","glazing_type"
]
general_details = [
"PROPERTY_TYPE","TOTAL_FLOOR_AREA",
"BUILT_FORM","sap_band_letter","FLOOR_HEIGHT"
]
features = energy_system_columns + envelop_columns + general_details
cat_cols = [
"MAIN_HEATING_SYSTEM","SECONDARY_HEATING_SYSTEM",
"MAIN_FUEL_TYPE","DHW_SUPPLY_SYSTEM","VENTILATION_SYSTEM",
"FLOOR_INSULATION_TYPE","FLOOR_BOUNDARY_TYPE",
"WALL_TYPE","WALL_INSULATION_MODEL",
"ROOF_CLASS","ROOF_INSULATION_TYPE",
"glazing_type",
"PROPERTY_TYPE","BUILT_FORM","sap_band_letter"
]
@dataclass
class SAPTables:
s3: pd.DataFrame
walls_u: pd.DataFrame
s9: pd.DataFrame
s10: pd.DataFrame
@classmethod
def from_local_dir(cls, base_dir: str) -> "SAPTables":
base = Path(base_dir)
return cls(
s3=pd.read_excel(base / "S3_sap.xlsx"),
walls_u=pd.read_excel(base / "external_wall_u_values2.xlsx"),
s9=pd.read_excel(base / "SAP_Table_ROOF_S9.xlsx"),
s10=pd.read_excel(base / "SAP_Table_ROOF_S10.xlsx"),
)
class EPCFeatureEngineer:
def __init__(self, sap: SAPTables):
self.sap = sap
def transform(self, df: pd.DataFrame) -> pd.DataFrame:
df = df.copy()
df.replace("", pd.NA, inplace=True)
df["FLOOR_HEIGHT"] = df["FLOOR_HEIGHT"].fillna(2.5)
# SAP age bands
df = age_band_to_sap_letter(df)
# Envelope
df = windows_feature_engineering_vectorised(df)
df = energy_system_feature_engineering_vectorised(df)
df = floor_feature_engineering_fast(df, self.sap.s3)
df = wall_feature_engineering(df, self.sap.walls_u)
df = roof_feature_engineering(df, self.sap.s9, self.sap.s10)
# Heating efficiency
df["MAINHEAT_EFF_NUM"] = (
df["MAINHEAT_ENERGY_EFF"]
.str.lower()
.map(EFF_MAP)
.fillna(0.75)
)
# Hot water efficiency
df["HOT_WATER_ENERGY_NUM"] = (
df["HOT_WATER_ENERGY_EFF"]
.str.lower()
.map(DHW_EFF_MAP)
)
df.loc[
df["HOT_WATER_ENERGY_NUM"].isna() &
df["DHW_SUPPLY_SYSTEM"].notna(),
"HOT_WATER_ENERGY_NUM"
] = 0.78
df["HOT_WATER_ENERGY_NUM"] = df["HOT_WATER_ENERGY_NUM"].fillna(0.75)
# Categoricals
df[cat_cols] = df[cat_cols].fillna("UNKNOWN").astype(str)
return df[features]