zcemg08 commited on
Commit
c831cba
·
1 Parent(s): e103a30

upgrade code for retrofit walls and roofs calls

Browse files
src/features/build_features.py CHANGED
@@ -66,7 +66,7 @@ energy_system_columns = [
66
 
67
  envelop_columns = [
68
  "FLOOR_U_VALUE","FLOOR_INSULATION_TYPE","FLOOR_BOUNDARY_TYPE",
69
- "WALL_U_VALUE","WALL_TYPE","WALL_INSULATION",
70
  "ROOF_U_VALUE","ROOF_CLASS","ROOF_INSULATION_TYPE",
71
  "glazing_area_m2","glazing_type"
72
  ]
@@ -83,7 +83,7 @@ cat_cols = [
83
  "MAIN_HEATING_SYSTEM","SECONDARY_HEATING_SYSTEM",
84
  "MAIN_FUEL_TYPE","DHW_SUPPLY_SYSTEM","VENTILATION_SYSTEM",
85
  "FLOOR_INSULATION_TYPE","FLOOR_BOUNDARY_TYPE",
86
- "WALL_TYPE","WALL_INSULATION",
87
  "ROOF_CLASS","ROOF_INSULATION_TYPE",
88
  "glazing_type",
89
  "PROPERTY_TYPE","BUILT_FORM","sap_band_letter"
 
66
 
67
  envelop_columns = [
68
  "FLOOR_U_VALUE","FLOOR_INSULATION_TYPE","FLOOR_BOUNDARY_TYPE",
69
+ "WALL_U_VALUE","WALL_TYPE","WALL_INSULATION_MODEL",
70
  "ROOF_U_VALUE","ROOF_CLASS","ROOF_INSULATION_TYPE",
71
  "glazing_area_m2","glazing_type"
72
  ]
 
83
  "MAIN_HEATING_SYSTEM","SECONDARY_HEATING_SYSTEM",
84
  "MAIN_FUEL_TYPE","DHW_SUPPLY_SYSTEM","VENTILATION_SYSTEM",
85
  "FLOOR_INSULATION_TYPE","FLOOR_BOUNDARY_TYPE",
86
+ "WALL_TYPE","WALL_INSULATION_MODEL",
87
  "ROOF_CLASS","ROOF_INSULATION_TYPE",
88
  "glazing_type",
89
  "PROPERTY_TYPE","BUILT_FORM","sap_band_letter"
src/features/roofs.py CHANGED
@@ -313,6 +313,11 @@ def extract_roof_insulation(row):
313
  return "unknown"
314
 
315
 
 
 
 
 
 
316
 
317
  def build_roof_lookup(roof_desc: pd.Series) -> pd.DataFrame:
318
  """
@@ -356,6 +361,34 @@ def build_roof_lookup(roof_desc: pd.Series) -> pd.DataFrame:
356
  mm = s.str.extract(r"(\d+)\s*\+?\s*mm", expand=False)
357
  out["ROOF_MM_RAW"] = pd.to_numeric(mm, errors="coerce")
358
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
359
  # ---------------------------
360
  # NORMALISE TO SAP S9 MM
361
  # ---------------------------
 
313
  return "unknown"
314
 
315
 
316
+ S9_MM = np.array([0, 12, 25, 50, 75, 100, 150, 200, 250, 270, 300, 350, 400])
317
+ S9_U = np.array([2.3, 1.5, 1.0, 0.68, 0.50, 0.40, 0.30, 0.21, 0.17, 0.16, 0.14, 0.12, 0.11])
318
+
319
+ S9_LOOKUP = dict(zip(S9_MM, S9_U))
320
+
321
 
322
  def build_roof_lookup(roof_desc: pd.Series) -> pd.DataFrame:
323
  """
 
361
  mm = s.str.extract(r"(\d+)\s*\+?\s*mm", expand=False)
362
  out["ROOF_MM_RAW"] = pd.to_numeric(mm, errors="coerce")
363
 
364
+ # ---------------------------
365
+ # APPLY RETROFIT TO MEASURED U-VALUES
366
+ # ---------------------------
367
+
368
+ mask_measured_upgrade = (
369
+ out["ROOF_MEASURED_U"].notna() &
370
+ out["ROOF_MM_RAW"].notna()
371
+ )
372
+
373
+ if mask_measured_upgrade.any():
374
+ u_meas = out.loc[mask_measured_upgrade, "ROOF_MEASURED_U"].values
375
+ mm_add = out.loc[mask_measured_upgrade, "ROOF_MM_RAW"].astype(int).values
376
+
377
+ # inverse S9 (nearest)
378
+ diff = np.abs(u_meas[:, None] - S9_U[None, :])
379
+ base_mm = S9_MM[diff.argmin(axis=1)]
380
+
381
+ # add retrofit + clip
382
+ new_mm = np.minimum(base_mm + mm_add, 400)
383
+
384
+ # forward S9 lookup
385
+ out.loc[mask_measured_upgrade, "ROOF_MEASURED_U"] = S9_U[
386
+ np.searchsorted(S9_MM, new_mm)
387
+ ]
388
+
389
+ # zero out insulation thickness for measured U-value rows
390
+ out.loc[out["ROOF_CLASS"] == "measured", "ROOF_MM_RAW"] = pd.NA
391
+
392
  # ---------------------------
393
  # NORMALISE TO SAP S9 MM
394
  # ---------------------------
src/features/walls.py CHANGED
@@ -6,31 +6,61 @@ import numpy as np
6
  def extract_wall_u_from_text(text: str | float | None) -> float | None:
7
  """
8
  Extract numeric U-value from WALLS_DESCRIPTION when it contains
9
- 'Average thermal transmittance ...', which corresponds to the
10
- average external wall U-value in SAP/RdSAP EPCs.
11
 
12
- If no numeric value is found or it looks invalid (< 0.05),
13
- return None and let Stage 2 handle it.
 
 
 
 
 
14
  """
15
  if pd.isna(text):
16
  return None
17
 
18
  s = str(text).lower()
 
19
  if "average thermal transmittance" not in s:
20
  return None
21
 
22
- # Find first number in the string (handles '0.30', '1.4', etc.)
 
 
23
  nums = re.findall(r"([0-9]*\.?[0-9]+)", s)
24
  if not nums:
25
  return None
26
 
27
- u = float(nums[0])
28
 
29
- # EPC sometimes has '0.00' for missing; treat as unknown
30
- if u < 0.05:
31
  return None
32
 
33
- return u
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
 
36
 
@@ -368,9 +398,24 @@ def wall_feature_engineering(
368
  # ------------------------------------------------------------
369
  df["WALL_U_VALUE"] = df["WALL_U_MEASURED"].combine_first(df["WALL_U_TABLE"])
370
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
371
  # ------------------------------------------------------------
372
  # 5. Optional clean-up
373
  # ------------------------------------------------------------
374
- df.drop(columns=["WALL_U_TABLE"], inplace=True, errors="ignore")
375
 
376
  return df
 
6
  def extract_wall_u_from_text(text: str | float | None) -> float | None:
7
  """
8
  Extract numeric U-value from WALLS_DESCRIPTION when it contains
9
+ 'Average thermal transmittance ...'.
 
10
 
11
+ Supports optional insulation thickness suffix:
12
+ '..., 0 mm'
13
+ '..., 50 mm'
14
+ '..., 100 mm'
15
+ etc.
16
+
17
+ If insulation is present, applies R-addition.
18
  """
19
  if pd.isna(text):
20
  return None
21
 
22
  s = str(text).lower()
23
+
24
  if "average thermal transmittance" not in s:
25
  return None
26
 
27
+ # ------------------------------------------------------------
28
+ # 1. Extract baseline U-value
29
+ # ------------------------------------------------------------
30
  nums = re.findall(r"([0-9]*\.?[0-9]+)", s)
31
  if not nums:
32
  return None
33
 
34
+ u_base = float(nums[0])
35
 
36
+ # EPC sometimes has '0.00' for missing
37
+ if u_base < 0.05:
38
  return None
39
 
40
+ # ------------------------------------------------------------
41
+ # 2. Extract insulation thickness (mm), default = 0 mm
42
+ # ------------------------------------------------------------
43
+ mm_match = re.search(r"(\d+)\s*mm", s)
44
+ mm = int(mm_match.group(1)) if mm_match else 0
45
+
46
+ # ------------------------------------------------------------
47
+ # 3. Apply R-addition if insulation present
48
+ # ------------------------------------------------------------
49
+ R_INS_MAP = {
50
+ 0: 0.0,
51
+ 50: 1.4,
52
+ 100: 2.8,
53
+ 150: 4.2,
54
+ 200: 5.6,
55
+ }
56
+
57
+ R_ins = R_INS_MAP.get(mm, 0.0)
58
+
59
+ if R_ins > 0:
60
+ R_old = 1.0 / u_base
61
+ return 1.0 / (R_old + R_ins)
62
+
63
+ return u_base
64
 
65
 
66
 
 
398
  # ------------------------------------------------------------
399
  df["WALL_U_VALUE"] = df["WALL_U_MEASURED"].combine_first(df["WALL_U_TABLE"])
400
 
401
+ # ------------------------------------------------------------
402
+ # 4.5 Vectorised insulation collapse for ML model
403
+ # ------------------------------------------------------------
404
+
405
+ # Start with default = insulated
406
+ df["WALL_INSULATION_MODEL"] = "insulated"
407
+
408
+ # as built → as built
409
+ mask_as_built = df["WALL_INSULATION"].isin(["as built"])
410
+ df.loc[mask_as_built, "WALL_INSULATION_MODEL"] = "as built"
411
+
412
+ # unknown / NaN → unknown
413
+ mask_unknown = df["WALL_INSULATION"].isna() | df["WALL_INSULATION"].isin(["unknown"])
414
+ df.loc[mask_unknown, "WALL_INSULATION_MODEL"] = "unknown"
415
+
416
  # ------------------------------------------------------------
417
  # 5. Optional clean-up
418
  # ------------------------------------------------------------
419
+ df.drop(columns=["WALL_U_TABLE","WALL_INSULATION"], inplace=True, errors="ignore")
420
 
421
  return df