Spaces:
Runtime error
Runtime error
File size: 4,350 Bytes
d11b44e c831cba d11b44e c831cba d11b44e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 | import pandas as pd
from dataclasses import dataclass
from pathlib import Path
from src.features.construction_age_band_sap import normalize_construction_age_band, windows_feature_engineering_vectorised
from src.features.energy_system import energy_system_feature_engineering_vectorised
from src.features.floor import floor_feature_engineering_fast
from src.features.walls import wall_feature_engineering
from src.features.roofs import roof_feature_engineering
def build_age_band_lookup(series: pd.Series):
"""
Build lookup dict:
raw EPC CONSTRUCTION_AGE_BAND -> (sap_band_letter, sap_band_label)
"""
unique_vals = series.dropna().unique()
lookup = {}
for v in unique_vals:
letter, label = normalize_construction_age_band(v)
lookup[v] = (letter, label)
return lookup
def age_band_to_sap_letter(df: pd.DataFrame)-> pd.DataFrame:
df = df.copy()
lookup = build_age_band_lookup(df["CONSTRUCTION_AGE_BAND"])
age_df = (
pd.DataFrame.from_dict(
lookup,
orient="index",
columns=["sap_band_letter", "sap_band_label"]
)
)
df = df.join(age_df, on="CONSTRUCTION_AGE_BAND")
return df
EFF_MAP = {
"very poor": 0.60,
"poor": 0.68,
"average": 0.75,
"good": 0.85,
"very good": 0.92
}
DHW_EFF_MAP = {
"very poor": 0.65,
"poor": 0.72,
"average": 0.78,
"good": 0.85,
"very good": 0.90
}
energy_system_columns = [
"MAIN_HEATING_SYSTEM","SECONDARY_HEATING_SYSTEM",
"MAIN_FUEL_TYPE","DHW_SUPPLY_SYSTEM","VENTILATION_SYSTEM",
"LIGHTING_FRACTION_LOW_ENERGY","PV_KWP","MAINHEAT_EFF_NUM","ROOF_MM_S9",
"HOT_WATER_ENERGY_NUM"
]
envelop_columns = [
"FLOOR_U_VALUE","FLOOR_INSULATION_TYPE","FLOOR_BOUNDARY_TYPE",
"WALL_U_VALUE","WALL_TYPE","WALL_INSULATION_MODEL",
"ROOF_U_VALUE","ROOF_CLASS","ROOF_INSULATION_TYPE",
"glazing_area_m2","glazing_type"
]
general_details = [
"PROPERTY_TYPE","TOTAL_FLOOR_AREA",
"BUILT_FORM","sap_band_letter","FLOOR_HEIGHT"
]
features = energy_system_columns + envelop_columns + general_details
cat_cols = [
"MAIN_HEATING_SYSTEM","SECONDARY_HEATING_SYSTEM",
"MAIN_FUEL_TYPE","DHW_SUPPLY_SYSTEM","VENTILATION_SYSTEM",
"FLOOR_INSULATION_TYPE","FLOOR_BOUNDARY_TYPE",
"WALL_TYPE","WALL_INSULATION_MODEL",
"ROOF_CLASS","ROOF_INSULATION_TYPE",
"glazing_type",
"PROPERTY_TYPE","BUILT_FORM","sap_band_letter"
]
@dataclass
class SAPTables:
s3: pd.DataFrame
walls_u: pd.DataFrame
s9: pd.DataFrame
s10: pd.DataFrame
@classmethod
def from_local_dir(cls, base_dir: str) -> "SAPTables":
base = Path(base_dir)
return cls(
s3=pd.read_excel(base / "S3_sap.xlsx"),
walls_u=pd.read_excel(base / "external_wall_u_values2.xlsx"),
s9=pd.read_excel(base / "SAP_Table_ROOF_S9.xlsx"),
s10=pd.read_excel(base / "SAP_Table_ROOF_S10.xlsx"),
)
class EPCFeatureEngineer:
def __init__(self, sap: SAPTables):
self.sap = sap
def transform(self, df: pd.DataFrame) -> pd.DataFrame:
df = df.copy()
df.replace("", pd.NA, inplace=True)
df["FLOOR_HEIGHT"] = df["FLOOR_HEIGHT"].fillna(2.5)
# SAP age bands
df = age_band_to_sap_letter(df)
# Envelope
df = windows_feature_engineering_vectorised(df)
df = energy_system_feature_engineering_vectorised(df)
df = floor_feature_engineering_fast(df, self.sap.s3)
df = wall_feature_engineering(df, self.sap.walls_u)
df = roof_feature_engineering(df, self.sap.s9, self.sap.s10)
# Heating efficiency
df["MAINHEAT_EFF_NUM"] = (
df["MAINHEAT_ENERGY_EFF"]
.str.lower()
.map(EFF_MAP)
.fillna(0.75)
)
# Hot water efficiency
df["HOT_WATER_ENERGY_NUM"] = (
df["HOT_WATER_ENERGY_EFF"]
.str.lower()
.map(DHW_EFF_MAP)
)
df.loc[
df["HOT_WATER_ENERGY_NUM"].isna() &
df["DHW_SUPPLY_SYSTEM"].notna(),
"HOT_WATER_ENERGY_NUM"
] = 0.78
df["HOT_WATER_ENERGY_NUM"] = df["HOT_WATER_ENERGY_NUM"].fillna(0.75)
# Categoricals
df[cat_cols] = df[cat_cols].fillna("UNKNOWN").astype(str)
return df[features] |